From 2af5f375a639eacc85621435d715a763cb391dc0 Mon Sep 17 00:00:00 2001 From: zxstty Date: Mon, 12 May 2025 23:14:12 +0800 Subject: [PATCH 01/15] tj --- data_chain/apps/app.py | 187 ++--- data_chain/apps/base/convertor.py | 627 ++++++++++++++++ .../apps/base/convertor/chunk_convertor.py | 15 - .../apps/base/convertor/document_convertor.py | 22 - .../base/convertor/knowledge_convertor.py | 44 -- .../apps/base/convertor/model_convertor.py | 36 - .../apps/base/convertor/task_convertor.py | 27 - data_chain/apps/base/session/session.py | 128 ---- .../apps/base/task/document_task_handler.py | 233 ------ .../base/task/knowledge_base_task_handler.py | 324 -------- data_chain/apps/base/task/message_queue.py | 33 + data_chain/apps/base/task/process_handler.py | 67 ++ data_chain/apps/base/task/task_handler.py | 179 ----- .../base/task/worker/acc_testing_worker.py | 452 +++++++++++ .../apps/base/task/worker/base_worker.py | 107 +++ .../base/task/worker/export_dataset_worker.py | 207 ++++++ .../worker/export_knowledge_base_worker.py | 229 ++++++ .../task/worker/generate_dataset_worker.py | 264 +++++++ .../base/task/worker/import_dataset_worker.py | 278 +++++++ .../worker/import_knowledge_base_worker.py | 254 +++++++ .../base/task/worker/parse_document_worker.py | 506 +++++++++++++ .../apps/base/{document => }/zip_handler.py | 55 +- data_chain/apps/router/acc_testing.py | 128 ++++ data_chain/apps/router/chunk.py | 121 ++- data_chain/apps/router/dataset.py | 186 +++++ data_chain/apps/router/document.py | 323 +++----- data_chain/apps/router/health_check.py | 2 +- data_chain/apps/router/knowledge_base.py | 466 ++++-------- data_chain/apps/router/model.py | 51 -- data_chain/apps/router/other.py | 84 ++- data_chain/apps/router/role.py | 58 ++ data_chain/apps/router/task.py | 57 ++ data_chain/apps/router/team.py | 132 ++++ data_chain/apps/router/user.py | 178 +---- data_chain/apps/router/usr_message.py | 37 + .../apps/service/acc_testing_service.py | 203 +++++ data_chain/apps/service/chunk_service.py | 439 ++--------- data_chain/apps/service/dataset_service.py | 337 +++++++++ data_chain/apps/service/document_service.py | 634 ++++++---------- .../apps/service/knwoledge_base_service.py | 578 ++++++++------- data_chain/apps/service/router_service.py | 12 + data_chain/apps/service/session_service.py | 54 ++ data_chain/apps/service/task_queue_service.py | 116 +++ data_chain/apps/service/task_service.py | 181 +++-- data_chain/apps/service/team_service.py | 102 +++ data_chain/apps/service/user_service.py | 96 --- data_chain/common/.env.example | 78 +- data_chain/common/pp.py | 57 +- data_chain/common/prompt.yaml | 313 +++++++- data_chain/config/config.py | 65 +- .../embedding.py} | 22 +- data_chain/entities/common.py | 350 +++++++++ data_chain/entities/enum.py | 200 +++++ data_chain/entities/request_data.py | 246 ++++++ data_chain/entities/response_data.py | 654 ++++++++++++++++ data_chain/exceptions/err_code.py | 35 - data_chain/exceptions/exception.py | 20 - data_chain/llm/icon/ollama.svg | 1 + data_chain/{apps/base/model => llm}/llm.py | 38 +- data_chain/manager/chunk_manager.py | 701 ++++++------------ data_chain/manager/dataset_manager.py | 216 ++++++ data_chain/manager/document_manager.py | 427 +++++------ data_chain/manager/document_type_manager.py | 146 ++-- data_chain/manager/image_manager.py | 119 +-- data_chain/manager/knowledge_manager.py | 217 +++--- data_chain/manager/qa_manager.py | 158 ++++ data_chain/manager/role_manager.py | 91 +++ data_chain/manager/session_manager.py | 37 + data_chain/manager/task_manager.py | 386 ++++------ data_chain/manager/task_queue_mamanger.py | 61 ++ data_chain/manager/task_report_manager.py | 74 ++ data_chain/manager/team_manager.py | 152 ++++ data_chain/manager/testcase_manager.py | 74 ++ data_chain/manager/testing_manager.py | 151 ++++ data_chain/manager/touch | 0 data_chain/manager/user_manager.py | 106 +-- data_chain/manager/vector_items_manager.py | 201 ----- data_chain/models/api.py | 240 ------ data_chain/models/constant.py | 95 --- data_chain/models/service.py | 88 --- data_chain/parser/handler/base_parser.py | 534 +------------ data_chain/parser/handler/doc_parser.py | 65 +- data_chain/parser/handler/docx_parser.py | 212 +++--- data_chain/parser/handler/html_parser.py | 234 ++++-- data_chain/parser/handler/json_parser.py | 34 + data_chain/parser/handler/md_parser.py | 213 ++++-- data_chain/parser/handler/md_zip_parser.py | 205 +++++ data_chain/parser/handler/pdf_parser.py | 392 +++++----- data_chain/parser/handler/pptx_parser.py | 122 +-- data_chain/parser/handler/txt_parser.py | 61 +- data_chain/parser/handler/xlsx_parser.py | 62 +- data_chain/parser/handler/yaml_parser.py | 33 + data_chain/parser/parse_result.py | 26 + data_chain/parser/service/parser_service.py | 247 ------ data_chain/parser/tools/image_tool.py | 16 + data_chain/parser/tools/ocr.py | 153 ---- data_chain/parser/tools/ocr_tool.py | 89 +++ data_chain/parser/tools/split.py | 20 - data_chain/parser/tools/token_tool.py | 507 +++++++++++++ data_chain/rag/base_searcher.py | 48 ++ data_chain/stores/database/database.py | 564 ++++++++++++++ data_chain/stores/minio/minio.py | 62 +- data_chain/stores/mongodb/mongodb.py | 74 ++ data_chain/stores/postgres/postgres.py | 381 ---------- data_chain/stores/redis/redis.py | 12 +- example.html | 113 +++ example.md | 21 + example.zip | Bin 0 -> 404 bytes fastapi_openapi.yaml | 0 openapi.json | 1 + temp/example.md | 21 + test.html | 14 + 112 files changed, 11904 insertions(+), 7300 deletions(-) create mode 100644 data_chain/apps/base/convertor.py delete mode 100644 data_chain/apps/base/convertor/chunk_convertor.py delete mode 100644 data_chain/apps/base/convertor/document_convertor.py delete mode 100644 data_chain/apps/base/convertor/knowledge_convertor.py delete mode 100644 data_chain/apps/base/convertor/model_convertor.py delete mode 100644 data_chain/apps/base/convertor/task_convertor.py delete mode 100644 data_chain/apps/base/session/session.py delete mode 100644 data_chain/apps/base/task/document_task_handler.py delete mode 100644 data_chain/apps/base/task/knowledge_base_task_handler.py create mode 100644 data_chain/apps/base/task/message_queue.py create mode 100644 data_chain/apps/base/task/process_handler.py delete mode 100644 data_chain/apps/base/task/task_handler.py create mode 100644 data_chain/apps/base/task/worker/acc_testing_worker.py create mode 100644 data_chain/apps/base/task/worker/base_worker.py create mode 100644 data_chain/apps/base/task/worker/export_dataset_worker.py create mode 100644 data_chain/apps/base/task/worker/export_knowledge_base_worker.py create mode 100644 data_chain/apps/base/task/worker/generate_dataset_worker.py create mode 100644 data_chain/apps/base/task/worker/import_dataset_worker.py create mode 100644 data_chain/apps/base/task/worker/import_knowledge_base_worker.py create mode 100644 data_chain/apps/base/task/worker/parse_document_worker.py rename data_chain/apps/base/{document => }/zip_handler.py (57%) create mode 100644 data_chain/apps/router/acc_testing.py create mode 100644 data_chain/apps/router/dataset.py delete mode 100644 data_chain/apps/router/model.py create mode 100644 data_chain/apps/router/role.py create mode 100644 data_chain/apps/router/task.py create mode 100644 data_chain/apps/router/team.py create mode 100644 data_chain/apps/router/usr_message.py create mode 100644 data_chain/apps/service/acc_testing_service.py create mode 100644 data_chain/apps/service/dataset_service.py create mode 100644 data_chain/apps/service/router_service.py create mode 100644 data_chain/apps/service/session_service.py create mode 100644 data_chain/apps/service/task_queue_service.py create mode 100644 data_chain/apps/service/team_service.py delete mode 100644 data_chain/apps/service/user_service.py rename data_chain/{apps/service/embedding_service.py => embedding/embedding.py} (68%) create mode 100644 data_chain/entities/common.py create mode 100644 data_chain/entities/enum.py create mode 100644 data_chain/entities/request_data.py create mode 100644 data_chain/entities/response_data.py delete mode 100644 data_chain/exceptions/err_code.py delete mode 100644 data_chain/exceptions/exception.py create mode 100644 data_chain/llm/icon/ollama.svg rename data_chain/{apps/base/model => llm}/llm.py (68%) create mode 100644 data_chain/manager/dataset_manager.py create mode 100644 data_chain/manager/qa_manager.py create mode 100644 data_chain/manager/role_manager.py create mode 100644 data_chain/manager/session_manager.py create mode 100644 data_chain/manager/task_queue_mamanger.py create mode 100644 data_chain/manager/task_report_manager.py create mode 100644 data_chain/manager/team_manager.py create mode 100644 data_chain/manager/testcase_manager.py create mode 100644 data_chain/manager/testing_manager.py create mode 100644 data_chain/manager/touch delete mode 100644 data_chain/manager/vector_items_manager.py delete mode 100644 data_chain/models/api.py delete mode 100644 data_chain/models/constant.py delete mode 100644 data_chain/models/service.py create mode 100644 data_chain/parser/handler/json_parser.py create mode 100644 data_chain/parser/handler/md_zip_parser.py create mode 100644 data_chain/parser/handler/yaml_parser.py create mode 100644 data_chain/parser/parse_result.py delete mode 100644 data_chain/parser/service/parser_service.py create mode 100644 data_chain/parser/tools/image_tool.py delete mode 100644 data_chain/parser/tools/ocr.py create mode 100644 data_chain/parser/tools/ocr_tool.py delete mode 100644 data_chain/parser/tools/split.py create mode 100644 data_chain/parser/tools/token_tool.py create mode 100644 data_chain/rag/base_searcher.py create mode 100644 data_chain/stores/database/database.py create mode 100644 data_chain/stores/mongodb/mongodb.py delete mode 100644 data_chain/stores/postgres/postgres.py create mode 100644 example.html create mode 100644 example.md create mode 100644 example.zip create mode 100644 fastapi_openapi.yaml create mode 100644 openapi.json create mode 100644 temp/example.md create mode 100644 test.html diff --git a/data_chain/apps/app.py b/data_chain/apps/app.py index 7a610c2..2df1ccc 100644 --- a/data_chain/apps/app.py +++ b/data_chain/apps/app.py @@ -1,152 +1,91 @@ # Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -import uuid -from data_chain.logger.logger import logger as logging -from sqlalchemy import select, update, or_,and_ -import fastapi -import uvicorn -from fastapi.responses import JSONResponse -from asgi_correlation_id import CorrelationIdMiddleware +from typing import Annotated +from fastapi import APIRouter, Depends, Query, Body from apscheduler.schedulers.asyncio import AsyncIOScheduler - -from data_chain.config.config import config -from data_chain.apps.router import chunk,document,health_check,knowledge_base,model,other,user -from data_chain.apps.service.task_service import task_queue_handler, monitor_tasks -from data_chain.apps.service.user_service import UserHTTPException -from data_chain.stores.postgres.postgres import PostgresDB, DocumentTypeEntity, DocumentEntity, KnowledgeBaseEntity, TaskEntity,User -from data_chain.models.constant import DocumentEmbeddingConstant, KnowledgeStatusEnum, TaskConstant -from data_chain.apps.base.task.task_handler import TaskRedisHandler +import uvicorn +import fastapi +from data_chain.entities.common import actions, DEFAULt_DOC_TYPE_ID +from data_chain.apps.router import ( + team, + knowledge_base, + document, + chunk, + dataset, + acc_testing, + health_check, + other, + role, + usr_message, + task +) +from data_chain.stores.database.database import DataBase, ActionEntity, DocumentTypeEntity +from data_chain.manager.role_manager import RoleManager +from data_chain.manager.document_type_manager import DocumentTypeManager # 关闭APScheduler的运行日志 # logging.getLogger('apscheduler').setLevel(logging.ERROR) - +from data_chain.apps.service.router_service import get_route_info +from data_chain.apps.service.task_queue_service import TaskQueueService app = fastapi.FastAPI(docs_url=None, redoc_url=None) -app.add_middleware(CorrelationIdMiddleware) scheduler = AsyncIOScheduler() -@app.exception_handler(UserHTTPException) -async def user_exception_handler(request, exc: UserHTTPException): - return JSONResponse( - status_code=exc.status_code, - content={"retcode": exc.retcode, "retmsg": exc.rtmsg, "data": exc.data}, - ) @app.on_event("startup") async def startup_event(): await configure() - await init_database() - TaskRedisHandler.clear_all_task(config['REDIS_PENDING_TASK_QUEUE_NAME']) - TaskRedisHandler.clear_all_task(config['REDIS_SUCCESS_TASK_QUEUE_NAME']) - TaskRedisHandler.clear_all_task(config['REDIS_RESTART_TASK_QUEUE_NAME']) - scheduler.add_job(task_queue_handler, 'interval', seconds=5) - scheduler.add_job(monitor_tasks, 'interval', seconds=25) - scheduler.start() - logging.info("Application startup complete.") - - -async def set_non_pending_documents_to_pending(): - async with await PostgresDB().get_session() as session: - # 构建更新语句 - update_stmt = ( - update(DocumentEntity).where( - or_( - DocumentEntity.status != DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_STATUS_PENDING, DocumentEntity. - status != DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_STATUS_RUNNING)).values( - status=DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_STATUS_PENDING)) - - # 执行更新 - await session.execute(update_stmt) - await session.commit() - - -async def set_non_idle_knowledge_bases_to_idle(): - async with await PostgresDB().get_session() as session: - # 构建更新语句 - update_stmt = ( - update(KnowledgeBaseEntity) - .where(and_(KnowledgeBaseEntity.status != KnowledgeStatusEnum.IDLE)) - .values(status=KnowledgeStatusEnum.IDLE) + await DataBase.init_all_table() + await add_acitons() + await TaskQueueService.init_task_queue() + scheduler.add_job(TaskQueueService.handle_tasks, 'interval', seconds=5) + + +async def add_acitons(): + for action in actions: + action_entity = ActionEntity( + action=action['action'], + name=action['name'], + type=action['type'], ) + await RoleManager.add_action(action_entity) - # 执行更新 - await session.execute(update_stmt) - await session.commit() - -async def set_non_canceled_task_to_canceled(): - async with await PostgresDB().get_session() as session: - # 构建更新语句 - update_stmt = ( - update(TaskEntity) - .where(or_(TaskEntity.status == TaskConstant.TASK_STATUS_RUNNING, - TaskEntity.status == TaskConstant.TASK_STATUS_PENDING)) - .values(status=TaskConstant.TASK_STATUS_CANCELED) - ) - - # 执行更新 - await session.execute(update_stmt) - await session.commit() - -async def add_default_user(): - async with await PostgresDB().get_session() as session: - user_entity=User(account=config['DEFAULT_USER_ACCOUNT'],passwd=config['DEFAULT_USER_PASSWD'],name=config['DEFAULT_USER_NAME'],language=config['DEFAULT_USER_LANGUAGE']) - stmt = select(User).where(User.account==config['DEFAULT_USER_ACCOUNT']) - result = await session.execute(stmt) - existing_type = result.scalars().first() - if not existing_type: - session.add(user_entity) - await session.commit() - logging.info("Default user added.") - else: - logging.info("Default user exists.") - -async def init_database(): - await PostgresDB.init_all_table() - zero_uuid = uuid.UUID('00000000-0000-0000-0000-000000000000') - document_type = DocumentTypeEntity(id=zero_uuid, kb_id=None, type='default type') - async with await PostgresDB().get_session() as session: - # 使用异步查询 - stmt = select(DocumentTypeEntity).where(DocumentTypeEntity.id == zero_uuid) - result = await session.execute(stmt) - existing_type = result.scalars().first() - - if not existing_type: - session.add(document_type) - await session.commit() - logging.info("Default document type added.") - else: - logging.info("Default document type exists.") - await add_default_user() - await set_non_canceled_task_to_canceled() - await set_non_pending_documents_to_pending() - await set_non_idle_knowledge_bases_to_idle() +async def add_document_type(): + DocumentTypeEntity( + id=DEFAULt_DOC_TYPE_ID, + name="default", + ) + await DocumentTypeManager.add_document_type(document_type_entity) async def configure(): + app.include_router(team.router) + app.include_router(knowledge_base.router) app.include_router(chunk.router) app.include_router(document.router) app.include_router(health_check.router) - app.include_router(knowledge_base.router) - app.include_router(model.router) + app.include_router(dataset.router) app.include_router(other.router) - app.include_router(user.router) + app.include_router(acc_testing.router) + app.include_router(role.router) + app.include_router(usr_message.router) + app.include_router(task.router) +# 定义一个路由来获取所有路由信息 + + +@app.get("/routes") +def get_all_routes(action: Annotated[str, Depends(get_route_info)]): + print(action) + route_info = [] + for route in app.routes: + if hasattr(route, "methods") and hasattr(route, "path"): + route_info.append( + list(route.methods)[0]+' '+route.path + ) + return route_info def main(): - try: - ssl_enable = config["SSL_ENABLE"] - if ssl_enable: - uvicorn.run(app, host=config["UVICORN_IP"], port=int(config["UVICORN_PORT"]), - proxy_headers=True, forwarded_allow_ips='*', - ssl_certfile=config["SSL_CERTFILE"], - ssl_keyfile=config["SSL_KEYFILE"], - ) - else: - uvicorn.run(app, host=config["UVICORN_IP"], port=int(config["UVICORN_PORT"]), - proxy_headers=True, forwarded_allow_ips='*' - ) - except Exception as e: - logging.error(f"Error running the app: {e}") - exit(1) + uvicorn.run(app, host='0.0.0.0', port=3002) if __name__ == '__main__': diff --git a/data_chain/apps/base/convertor.py b/data_chain/apps/base/convertor.py new file mode 100644 index 0000000..5065766 --- /dev/null +++ b/data_chain/apps/base/convertor.py @@ -0,0 +1,627 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from typing import Any, Dict +import base64 +import hashlib +import json +import uuid +from data_chain.entities.request_data import ( + CreateTeamRequest, + UpdateKnowledgeBaseRequest, + DocumentType as DocumentTypeRequest, + CreateKnowledgeBaseRequest, + UpdateDocumentRequest, + UpdateChunkRequest, + CreateDatasetRequest, + CreateTestingRequest, + UpdateTestingRequest +) +from data_chain.entities.response_data import ( + User, + Team, + Knowledgebase, + DocumentType as DocumentTypeResponse, + Document, + Chunk, + LLM, + Dataset, + Data, + Testing, + TestCase, + Task +) + +from data_chain.entities.enum import ( + UserStatus, + TeamStatus, + TaskType, + TaskStatus, + KnowledgeBaseStatus, + DocumentStatus, + ChunkType, + SearchMethod, + TestingStatus, + TestCaseStatus +) +from data_chain.entities.common import default_roles +from data_chain.stores.database.database import ( + UserEntity, + TeamEntity, + KnowledgeBaseEntity, + DocumentTypeEntity, + DocumentEntity, + ChunkEntity, + DataSetEntity, + DataSetDocEntity, + QAEntity, + TestingEntity, + TestCaseEntity, + TaskEntity, + TaskReportEntity, + TeamUserEntity, + UserRoleEntity, + RoleEntity, + RoleActionEntity +) +from data_chain.config.config import config +from data_chain.logger.logger import logger as logging + + +class Convertor: + """数据转换器""" + + @staticmethod + async def convert_request_to_dict(req: Any) -> dict: + """将请求转换为字典""" + try: + req_dict = req.dict(exclude_none=True) + return req_dict + except Exception as e: + err = "请求转换为字典失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_user_sub_to_user_entity(user_sub: str) -> UserEntity: + """将用户ID转换为用户实体""" + try: + user_entity = UserEntity(id=user_sub, name=user_sub, status=UserStatus.ACTIVE) + return user_entity + except Exception as e: + err = "用户ID转换为用户实体失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_user_entity_to_user(user_entity: UserEntity) -> User: + """将用户实体转换为用户""" + try: + user = User(id=user_entity.id, name=user_entity.name) + return user + except Exception as e: + err = "用户实体转换为用户失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_create_team_request_to_team_entity( + user_sub: str, req: CreateTeamRequest) -> TeamEntity: + """将创建团队请求转换为团队实体""" + try: + team_entity = TeamEntity( + author_id=user_sub, + author_name=user_sub, + name=req.team_name, + description=req.description, + member_cnt=1, + is_public=req.is_public, + status=TeamStatus.EXISTED + ) + return team_entity + except Exception as e: + err = "创建团队请求转换为团队实体失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_update_team_request_to_dict( + req: CreateTeamRequest) -> dict: + """将更新团队请求转换为字典""" + try: + + req_dict = { + 'name': req.team_name, + 'description': req.description, + 'is_public': req.is_public + } + return req_dict + except Exception as e: + err = "更新团队请求转换为字典失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_team_entity_to_team(team_entity: TeamEntity) -> Team: + """将团队实体转换为团队""" + try: + team = Team( + teamId=team_entity.id, + teamName=team_entity.name, + description=team_entity.description, + authorName=team_entity.author_name, + memberCount=team_entity.member_cnt, + isPublic=team_entity.is_public, + createdTime=team_entity.created_time.strftime('%Y-%m-%d %H:%M') + ) + return team + except Exception as e: + err = "团队实体转换为团队失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_user_sub_and_team_id_to_team_user_entity( + user_sub: str, team_id: uuid.UUID) -> TeamUserEntity: + """将用户ID和团队ID转换为团队用户实体""" + try: + team_user_entity = TeamUserEntity( + user_id=user_sub, + team_id=team_id + ) + return team_user_entity + except Exception as e: + err = "用户ID和团队ID转换为团队用户实体失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_default_role_dict_to_role_entity( + team_id: uuid.UUID, default_role_dict: Dict[str, Any]) -> RoleEntity: + """将默认角色字典转换为角色实体""" + try: + role_entity = RoleEntity( + team_id=team_id, + name=default_role_dict["name"], + is_unique=default_role_dict["is_unique"], + editable=default_role_dict["editable"], + ) + return role_entity + except Exception as e: + err = "默认角色字典转换为角色实体失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_default_role_action_dicts_to_role_action_entities( + role_id: uuid.UUID, + default_role_action_dicts: list[Dict[str, Any]]) -> list[RoleActionEntity]: + """将默认角色操作字典转换为角色操作实体""" + try: + role_action_entities = [] + for default_role_action_dict in default_role_action_dicts: + role_action_entity = RoleActionEntity( + role_id=role_id, + action=default_role_action_dict["action"], + ) + role_action_entities.append(role_action_entity) + return role_action_entities + except Exception as e: + err = "默认角色操作字典转换为角色操作实体失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_user_sub_role_id_and_team_id_to_user_role_entity( + user_sub: str, role_id: uuid.UUID, team_id: uuid.UUID) -> UserRoleEntity: + """将用户ID、角色ID和团队ID转换为用户角色实体""" + try: + user_role_entity = UserRoleEntity( + user_id=user_sub, + role_id=role_id, + team_id=team_id + ) + return user_role_entity + except Exception as e: + err = "用户ID、角色ID和团队ID转换为用户角色实体失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_update_knowledge_base_request_to_dict( + req: UpdateKnowledgeBaseRequest) -> dict: + """将更新知识库请求转换为字典""" + try: + req_dict = { + 'name': req.kb_name, + 'description': req.description, + 'tokenizer': req.tokenizer.value, + 'upload_count_limit': req.upload_count_limit, + 'upload_size_limit': req.upload_size_limit, + 'default_parse_method': req.default_parse_method.value, + 'default_chunk_size': req.default_chunk_size + } + return req_dict + except Exception as e: + err = "更新知识库请求转换为字典失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_knowledge_base_entity_to_knowledge_base( + knowledge_base_entity: KnowledgeBaseEntity, doc_type_entities: list[DocumentTypeEntity]) -> Knowledgebase: + """将知识库实体转换为知识库""" + try: + doc_types = [] + for doc_type_entity in doc_type_entities: + doc_type = await Convertor.convert_document_type_entity_to_document_type_response(doc_type_entity) + doc_types.append(doc_type) + knowledge_base = Knowledgebase( + kbId=knowledge_base_entity.id, + kbName=knowledge_base_entity.name, + authorName=knowledge_base_entity.author_name, + tokenizer=knowledge_base_entity.tokenizer, + embeddingModel=knowledge_base_entity.embedding_model, + description=knowledge_base_entity.description, + docCnt=knowledge_base_entity.doc_cnt, + docSize=knowledge_base_entity.doc_size, + uploadCountLimit=knowledge_base_entity.upload_count_limit, + uploadSizeLimit=knowledge_base_entity.upload_size_limit, + defaultParseMethod=knowledge_base_entity.default_parse_method, + defaultChunkSize=knowledge_base_entity.default_chunk_size, + createdTime=knowledge_base_entity.created_time.strftime('%Y-%m-%d %H:%M'), + docTypes=doc_types, + ) + return knowledge_base + except Exception as e: + err = "知识库实体转换为知识库失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_document_type_entity_to_document_type_response( + document_type_entity: DocumentTypeEntity) -> DocumentTypeResponse: + """将文档类型实体转换为文档类型""" + try: + document_type = DocumentTypeResponse( + docTypeId=document_type_entity.id, + docTypeName=document_type_entity.name + ) + return document_type + except Exception as e: + err = "文档类型实体转换为文档类型失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_create_knowledge_base_request_to_knowledge_base_entity( + user_sub: str, team_id: uuid.UUID, req: CreateKnowledgeBaseRequest) -> KnowledgeBaseEntity: + """将创建知识库请求转换为知识库实体""" + try: + knowledge_base_entity = KnowledgeBaseEntity( + team_id=team_id, + author_id=user_sub, + author_name=user_sub, + name=req.kb_name, + tokenizer=req.tokenizer.value, + description=req.description, + embedding_model=req.embedding_model, + upload_count_limit=req.upload_count_limit, + upload_size_limit=req.upload_size_limit, + default_parse_method=req.default_parse_method.value, + default_chunk_size=req.default_chunk_size, + ) + return knowledge_base_entity + except Exception as e: + err = "创建知识库请求转换为知识库实体失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_kb_id_and_requeset_document_type_to_document_type_entity( + kb_id: uuid.UUID, document_type: DocumentTypeRequest) -> DocumentTypeEntity: + """将知识库ID和文档类型转换为文档类型实体""" + try: + document_type_entity = DocumentTypeEntity( + id=document_type.doc_type_id, + kb_id=kb_id, + name=document_type.doc_type_name + ) + return document_type_entity + except Exception as e: + err = "知识库ID和文档类型转换为文档类型实体失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_document_entity_and_document_type_entity_to_document( + document_entity: DocumentEntity, document_type_entity: DocumentTypeEntity) -> Document: + """将文档实体和文档类型实体转换为文档""" + try: + document_type_response = await Convertor.convert_document_type_entity_to_document_type_response( + document_type_entity + ) + document = Document( + docId=document_entity.id, + docName=document_entity.name, + docType=document_type_response, + chunkSize=document_entity.chunk_size, + createdTime=document_entity.created_time.strftime('%Y-%m-%d %H:%M'), + parseMethod=document_entity.parse_method, + enabled=document_entity.enabled, + authorName=document_entity.author_name, + status=DocumentStatus(document_entity.status), + ) + return document + except Exception as e: + err = "文档实体和文档类型实体转换为文档失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_task_entity_to_task( + task_entity: TaskEntity, task_report: TaskReportEntity = None) -> Task: + """将任务实体和任务报告实体转换为任务""" + try: + task_completed = 0 + if task_report is not None: + task_completed = task_report.current_stage/task_report.stage_cnt + task = Task( + opId=task_entity.op_id, + opName=task_entity.op_name, + taskId=task_entity.id, + taskStatus=TaskStatus(task_entity.status), + taskType=TaskType(task_entity.type), + taskCompleted=task_completed, + createdTime=task_entity.created_time.strftime('%Y-%m-%d %H:%M') + ) + return task + except Exception as e: + err = "任务实体和任务报告实体转换为任务失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_update_document_request_to_dict( + req: UpdateDocumentRequest) -> dict: + """将更新文档请求转换为字典""" + try: + req_dict = { + 'name': req.doc_name, + 'type_id': req.doc_type_id, + 'parse_method': req.parse_method.value, + 'chunk_size': req.chunk_size, + 'enabled': req.enabled + } + return req_dict + except Exception as e: + err = "更新文档请求转换为字典失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_chunk_entity_to_chunk( + chunk_entity: ChunkEntity) -> Chunk: + """将chunk实体转换为chunk""" + try: + chunk = Chunk( + chunk_id=chunk_entity.id, + chunk_type=ChunkType(chunk_entity.type), + text=chunk_entity.text + ) + return chunk + except Exception as e: + err = "chunk实体转换为chunk失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_update_chunk_request_to_dict( + req: UpdateChunkRequest) -> dict: + """将更新chunk请求转换为字典""" + try: + req_dict = { + 'text': req.text, + 'enabled': req.enabled + } + return req_dict + except Exception as e: + err = "更新chunk请求转换为字典失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_dataset_entity_to_dataset( + dataset_entity: DataSetEntity) -> Dataset: + """将数据集实体转换为数据集""" + try: + dataset = Dataset( + datasetId=dataset_entity.id, + datasetName=dataset_entity.name, + description=dataset_entity.description, + dataCnt=dataset_entity.data_cnt, + isDataCleared=dataset_entity.is_data_cleared, + isChunkRelated=dataset_entity.is_chunk_related, + score=dataset_entity.score, + authorName=dataset_entity.author_name, + status=dataset_entity.status, + ) + return dataset + except Exception as e: + err = "数据集实体转换为数据集失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_llm_config_to_llm() -> LLM: + try: + with open('./data_chain/llm/icon/ollama.svg', 'r', encoding='utf-8') as file: + svg_content = file.read() + svg_bytes = svg_content.encode('utf-8') + base64_bytes = base64.b64encode(svg_bytes) + base64_string = base64_bytes.decode('utf-8') + config_params = { + 'MODEL_NAME': config['MODEL_NAME'], + 'OPENAI_API_BASE': config['OPENAI_API_BASE'], + 'OPENAI_API_KEY': config['OPENAI_API_KEY'], + 'REQUEST_TIMEOUT': config['REQUEST_TIMEOUT'], + 'MAX_TOKENS': config['MAX_TOKENS'], + 'TEMPERATURE': config['TEMPERATURE'] + } + config_json = json.dumps(config_params, sort_keys=True, ensure_ascii=False).encode('utf-8') + hash_object = hashlib.sha256(config_json) + hash_hex = hash_object.hexdigest() + llm = LLM( + llmId=hash_hex, + llmName=config['MODEL_NAME'], + llmIcon=base64_string, + ) + return llm + except Exception as e: + err = "llm配置转换为llm失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_qa_entity_to_data( + qa_entity: QAEntity) -> Data: + """将QA实体转换为数据""" + try: + data = Data( + dataId=qa_entity.id, + docName=qa_entity.doc_name, + question=qa_entity.question, + answer=qa_entity.answer, + chunk=qa_entity.chunk + ) + return data + except Exception as e: + err = "QA实体转换为数据失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_create_dataset_request_to_dataset_entity( + user_sub: str, team_id: str, req: CreateDatasetRequest) -> DataSetEntity: + """将创建数据集请求转换为数据集实体""" + try: + dataset_entity = DataSetEntity( + team_id=team_id, + author_id=user_sub, + author_name=user_sub, + kb_id=req.kb_id, + llm_id=req.llm_id, + name=req.dataset_name, + description=req.description, + is_data_cleared=req.is_data_cleared, + is_chunk_related=req.is_chunk_related, + ) + return dataset_entity + except Exception as e: + err = "创建数据集请求转换为数据集实体失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_dataset_id_and_doc_id_to_dataset_doc_entity( + dataset_id: uuid.UUID, doc_id: uuid.UUID) -> DataSetDocEntity: + """将数据集ID和文档ID转换为数据集文档实体""" + try: + dataset_doc_entity = DataSetDocEntity( + dataset_id=dataset_id, + doc_id=doc_id + ) + return dataset_doc_entity + except Exception as e: + err = "数据集ID和文档ID转换为数据集文档实体失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_testing_entity_to_testing(testing_entity: TestingEntity) -> Testing: + """将测试实体转换为测试""" + try: + testing = Testing( + testingId=testing_entity.id, + testingName=testing_entity.name, + description=testing_entity.description, + searchMethod=SearchMethod(testing_entity.search_method), + aveScore=testing_entity.ave_score, + avePre=testing_entity.ave_pre, + aveRec=testing_entity.ave_rec, + aveFai=testing_entity.ave_fai, + aveRel=testing_entity.ave_rel, + aveLcs=testing_entity.ave_lcs, + aveLeve=testing_entity.ave_leve, + aveJac=testing_entity.ave_jac, + authorName=testing_entity.author_name, + status=TestingStatus(testing_entity.status), + ) + return testing + except Exception as e: + err = "测试实体转换为测试失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_test_case_entity_to_test_case(test_case_entity: TestCaseEntity) -> TestCase: + """将测试用例实体转换为测试用例""" + try: + test_case = TestCase( + testCaseId=test_case_entity.id, + question=test_case_entity.question, + answer=test_case_entity.answer, + llmAnswer=test_case_entity.llm_answer, + relatedChunk=test_case_entity.related_chunk, + docName=test_case_entity.doc_name, + score=test_case_entity.score, + pre=test_case_entity.pre, + rec=test_case_entity.rec, + fai=test_case_entity.fai, + rel=test_case_entity.rel, + lcs=test_case_entity.lcs, + leve=test_case_entity.leve, + jac=test_case_entity.jac, + status=TestCaseStatus(test_case_entity.status), + ) + return test_case + except Exception as e: + err = "测试用例实体转换为测试用例失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_create_testing_request_to_testing_entity( + user_sub: str, team_id: uuid.UUID, kb_id: uuid.UUID, req: CreateTestingRequest) -> TestingEntity: + """将创建测试请求转换为测试实体""" + try: + testing_entity = TestingEntity( + team_id=team_id, + kb_id=kb_id, + author_id=user_sub, + author_name=user_sub, + dataset_id=req.dataset_id, + name=req.testing_name, + description=req.description, + llm_id=req.llm_id, + search_method=req.search_method.value, + ) + return testing_entity + except Exception as e: + err = "创建测试请求转换为测试实体失败" + logging.exception("[Convertor] %s", err) + raise e + + @staticmethod + async def convert_update_testing_request_to_dict(req: UpdateTestingRequest) -> dict: + """将更新测试请求转换为字典""" + try: + req_dict = { + 'name': req.testing_name, + 'description': req.description, + 'llm_id': req.llm_id, + 'search_method': req.search_method.value + } + return req_dict + except Exception as e: + err = "更新测试请求转换为字典失败" + logging.exception("[Convertor] %s", err) + raise e diff --git a/data_chain/apps/base/convertor/chunk_convertor.py b/data_chain/apps/base/convertor/chunk_convertor.py deleted file mode 100644 index 7a4a4a6..0000000 --- a/data_chain/apps/base/convertor/chunk_convertor.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from typing import Dict, List -from data_chain.models.service import ChunkDTO -from data_chain.stores.postgres.postgres import ChunkEntity - - -class ChunkConvertor(): - @staticmethod - def convert_entity_to_dto(chunk_entity: ChunkEntity) -> ChunkDTO: - return ChunkDTO( - id=str(chunk_entity.id), - text=chunk_entity.text, - enabled=chunk_entity.enabled, - type=chunk_entity.type.split('.')[1] - ) diff --git a/data_chain/apps/base/convertor/document_convertor.py b/data_chain/apps/base/convertor/document_convertor.py deleted file mode 100644 index a204a3f..0000000 --- a/data_chain/apps/base/convertor/document_convertor.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from data_chain.models.service import DocumentDTO, DocumentTypeDTO -from data_chain.stores.postgres.postgres import DocumentEntity, DocumentTypeEntity - - -class DocumentConvertor(): - - @staticmethod - def convert_entity_to_dto(document_entity: DocumentEntity, document_type_entity: DocumentTypeEntity - ) -> DocumentDTO: - document_type_dto = DocumentTypeDTO(id=str(document_type_entity.id), type=document_type_entity.type) - return DocumentDTO( - id=str(document_entity.id), - name=document_entity.name, - extension=document_entity.extension, - document_type=document_type_dto, - chunk_size=document_entity.chunk_size, - status=document_entity.status, - enabled=document_entity.enabled, - parser_method=document_entity.parser_method, - created_time=document_entity.created_time.strftime('%Y-%m-%d %H:%M') - ) diff --git a/data_chain/apps/base/convertor/knowledge_convertor.py b/data_chain/apps/base/convertor/knowledge_convertor.py deleted file mode 100644 index 313b68f..0000000 --- a/data_chain/apps/base/convertor/knowledge_convertor.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from typing import List -import uuid -from data_chain.models.service import DocumentTypeDTO, KnowledgeBaseDTO -from data_chain.stores.postgres.postgres import DocumentTypeEntity, KnowledgeBaseEntity -from data_chain.models.constant import KnowledgeStatusEnum - -class KnowledgeConvertor(): - - @staticmethod - def convert_dict_to_entity(tmp_dict:dict): - return KnowledgeBaseEntity( - name=tmp_dict['name'], - user_id=tmp_dict['user_id'], - language=tmp_dict['language'], - description=tmp_dict.get('description',''), - embedding_model=tmp_dict['embedding_model'], - document_number=0, - document_size=0, - vector_items_id=uuid.uuid4(), - status=KnowledgeStatusEnum.IDLE, - default_parser_method=tmp_dict['default_parser_method'], - default_chunk_size=tmp_dict['default_chunk_size']) - - @staticmethod - def convert_entity_to_dto(entity: KnowledgeBaseEntity, - document_type_entity_list: List[DocumentTypeEntity] - ) -> KnowledgeBaseDTO: - document_type_dto_list = [DocumentTypeDTO(id=str(document_type_entity.id), type=document_type_entity.type) - for document_type_entity in document_type_entity_list] - return KnowledgeBaseDTO( - id=str(entity.id), - name=entity.name, - language=entity.language, - description=entity.description, - embedding_model=entity.embedding_model, - default_parser_method=entity.default_parser_method, - default_chunk_size=entity.default_chunk_size, - document_count=entity.document_number, - status=entity.status, - document_size=entity.document_size, - document_type_list=document_type_dto_list, - created_time=entity.created_time.strftime('%Y-%m-%d %H:%M') - ) diff --git a/data_chain/apps/base/convertor/model_convertor.py b/data_chain/apps/base/convertor/model_convertor.py deleted file mode 100644 index 23a60e5..0000000 --- a/data_chain/apps/base/convertor/model_convertor.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from typing import Optional -import json -from data_chain.models.service import ModelDTO -from data_chain.stores.postgres.postgres import ModelEntity -from data_chain.apps.base.security.security import Security -from data_chain.config.config import ModelConfig - - -class ModelConvertor(): - @staticmethod - def convert_entity_to_dto(model_entity: Optional[ModelEntity] = None) -> ModelDTO: - if model_entity is None: - return ModelDTO() - return ModelDTO( - id=str(model_entity.id), - model_name=model_entity.model_name, - model_type=model_entity.model_type, - openai_api_base=model_entity.openai_api_base, - openai_api_key=Security.decrypt( - model_entity.encrypted_openai_api_key, - json.loads(model_entity.encrypted_config) - ), - max_tokens=model_entity.max_tokens, - is_online=model_entity.is_online - ) - - @staticmethod - def convert_config_to_entity(model_config: ModelConfig) -> ModelDTO: - if model_config is None: - return ModelEntity() - return ModelDTO( - id=str(model_config['MODEL_ID']), - model_name=model_config['MODEL_NAME'], - model_type=model_config['MODEL_TYPE'], - ) diff --git a/data_chain/apps/base/convertor/task_convertor.py b/data_chain/apps/base/convertor/task_convertor.py deleted file mode 100644 index f1f16d2..0000000 --- a/data_chain/apps/base/convertor/task_convertor.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from typing import Dict, List -from data_chain.models.service import TaskDTO, TaskReportDTO -from data_chain.stores.postgres.postgres import TaskEntity, TaskStatusReportEntity - - -class TaskConvertor(): - @staticmethod - def convert_entity_to_dto(task_entity: TaskEntity, TaskStatusReportEntityList: List[TaskStatusReportEntity] = None) -> TaskDTO: - reports = [] - for task_status_report_entity in TaskStatusReportEntityList: - reports.append( - TaskReportDTO( - id=task_status_report_entity.id, - message=task_status_report_entity.message, - current_stage=task_status_report_entity.current_stage, - stage_cnt=task_status_report_entity.stage_cnt, - create_time=task_status_report_entity.created_time.strftime('%Y-%m-%d %H:%M') - )) - return TaskDTO( - id=task_entity.id, - type=task_entity.type, - retry=task_entity.retry, - status=task_entity.status, - reports=reports, - create_time=task_entity.created_time.strftime('%Y-%m-%d %H:%M') - ) diff --git a/data_chain/apps/base/session/session.py b/data_chain/apps/base/session/session.py deleted file mode 100644 index 10e096f..0000000 --- a/data_chain/apps/base/session/session.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from __future__ import annotations - -import uuid -import base64 -import hashlib -import hmac -from data_chain.logger.logger import logger as logging -import secrets - -from data_chain.config.config import config -from data_chain.stores.redis.redis import RedisConnectionPool - - - - -class SessionManager: - def __init__(self): - raise NotImplementedError("SessionManager不可以被实例化") - - @staticmethod - def create_session(user_id: uuid) -> str: - session_id = secrets.token_hex(16) - with RedisConnectionPool.get_redis_connection() as r: - try: - data = {"session_id": session_id} - r.hmset(str(user_id), data) - r.expire(str(user_id), config["SESSION_TTL"] * 60) - data = {"user_id": str(user_id)} - r.hmset(session_id, data) - r.expire(session_id, config["SESSION_TTL"] * 60) - except Exception as e: - logging.error(f"Session error: {e}") - return session_id - - @staticmethod - def delete_session(user_id: uuid) -> bool: - with RedisConnectionPool.get_redis_connection() as r: - try: - old_session_id=None - if r.hexists(str(user_id), "session_id"): - old_session_id=r.hget(str(user_id), "session_id") - r.hdel(str(user_id), "session_id") - if old_session_id and r.hexists(old_session_id, "user_id"): - r.hdel(old_session_id, "user_id") - if old_session_id and r.hexists(old_session_id, "nonce"): - r.hdel(old_session_id, "nonce") - except Exception as e: - logging.error(f"Delete session error: {e}") - return False - - @staticmethod - def verify_user(session_id: str) -> bool: - with RedisConnectionPool.get_redis_connection() as r: - try: - user_exist = r.hexists(session_id, "user_id") - r.expire(session_id, config["SESSION_TTL"] * 60) - return user_exist - except Exception as e: - logging.error(f"User not in session: {e}") - return False - - @staticmethod - def get_user_id(session_id: str) -> str | None: - - with RedisConnectionPool.get_redis_connection() as r: - try: - user_id = r.hget(session_id, "user_id") - r.expire(session_id, config["SESSION_TTL"] * 60) - except Exception as e: - logging.error(f"Get user from session error: {e}") - return None - - return uuid.UUID(user_id) - - @staticmethod - def create_csrf_token(session_id: str) -> str | None: - rand = secrets.token_hex(8) - - with RedisConnectionPool.get_redis_connection() as r: - try: - r.hset(session_id, "nonce", rand) - r.expire(session_id, config["SESSION_TTL"] * 60) - except Exception as e: - logging.error(f"Create csrf token from session error: {e}") - return None - - csrf_value = f"{session_id}{rand}" - csrf_b64 = base64.b64encode(bytes.fromhex(csrf_value)) - - hmac_processor = hmac.new(key=base64.b64decode(config["CSRF_KEY"]), msg=csrf_b64, digestmod=hashlib.sha256) - signature = base64.b64encode(hmac_processor.digest()) - - csrf_b64 = csrf_b64.decode("utf-8") - signature = signature.decode("utf-8") - return f"{csrf_b64}.{signature}" - - @staticmethod - def verify_csrf_token(session_id: str, token: str) -> bool: - if not token: - return False - - token_msg = token.split(".") - if len(token_msg) != 2: - return False - - first_part = base64.b64decode(token_msg[0]).hex() - current_session_id = first_part[:32] - logging.error(f"current_session_id: {current_session_id}, session_id: {session_id}") - if current_session_id != session_id: - return False - - current_nonce = first_part[32:] - with RedisConnectionPool.get_redis_connection() as r: - try: - nonce = r.hget(current_session_id, "nonce") - if nonce != current_nonce: - return False - r.expire(current_session_id, config["SESSION_TTL"] * 60) - except Exception as e: - logging.error(f"Get csrf token from session error: {e}") - - hmac_obj = hmac.new(key=base64.b64decode(config["CSRF_KEY"]), - msg=token_msg[0].encode("utf-8"), digestmod=hashlib.sha256) - signature = hmac_obj.digest() - current_signature = base64.b64decode(token_msg[1]) - - return hmac.compare_digest(signature, current_signature) diff --git a/data_chain/apps/base/task/document_task_handler.py b/data_chain/apps/base/task/document_task_handler.py deleted file mode 100644 index eedbdd6..0000000 --- a/data_chain/apps/base/task/document_task_handler.py +++ /dev/null @@ -1,233 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -import os -import secrets -import uuid -import shutil -from data_chain.logger.logger import logger as logging -from fastapi import File, UploadFile -import aiofiles -from typing import List -from data_chain.apps.base.task.task_handler import TaskRedisHandler -from data_chain.manager.document_manager import DocumentManager, TemporaryDocumentManager -from data_chain.manager.knowledge_manager import KnowledgeBaseManager -from data_chain.manager.task_manager import TaskManager, TaskStatusReportManager -from data_chain.models.constant import DocumentEmbeddingConstant, OssConstant, TaskConstant -from data_chain.parser.service.parser_service import ParserService -from data_chain.stores.minio.minio import MinIO -from data_chain.stores.postgres.postgres import DocumentEntity, TaskEntity, TaskStatusReportEntity -from data_chain.config.config import config - - -class DocumentTaskHandler(): - - @staticmethod - async def save_document_file_to_local(target_dir: str, file: UploadFile): - document_file_path = os.path.join(target_dir, file.filename) - async with aiofiles.open(document_file_path, "wb") as f: - content = await file.read() - await f.write(content) - return document_file_path - - @staticmethod - async def handle_parser_document_task(t_id: uuid.UUID): - target_dir = None - try: - task_entity = await TaskManager.select_by_id(t_id) - if task_entity.status != TaskConstant.TASK_STATUS_RUNNING: - TaskRedisHandler.put_task_by_tail(config['REDIS_SILENT_ERROR_TASK_QUEUE_NAME'], str(task_entity.id)) - return - # 文件解析主入口 - - # 下载文件 - document_entity = await DocumentManager.select_by_id(task_entity.op_id) - target_dir = os.path.join(OssConstant.PARSER_SAVE_FOLDER, str(document_entity.id), secrets.token_hex(16)) - if not os.path.exists(target_dir): - os.makedirs(target_dir) - file_extension = document_entity.extension.lower() - file_path = target_dir + '/' + str(document_entity.id) + file_extension - await MinIO.download_object(OssConstant.MINIO_BUCKET_DOCUMENT, str(document_entity.id), - file_path) - - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'document {document_entity.name} begin to parser', - current_stage=1, - stage_cnt=7 - )) - - parser = ParserService() - answer = await parser.parser(document_entity.id, file_path) - chunk_list, chunk_link_list, images = answer['chunk_list'], answer['chunk_link_list'], answer[ - 'image_chunks'] - new_chunk_list=[] - for chunk in chunk_list: - if len(chunk['text'].strip())!=0: - new_chunk_list.append(chunk) - chunk_list = new_chunk_list - chunk_id_set = set() - for chunk in chunk_list: - chunk_id_set.add(chunk['id']) - new_chunk_link_list = [] - for chunk_link in chunk_link_list: - if chunk_link['chunk_a'] in chunk_id_set: - new_chunk_link_list.append(chunk_link) - chunk_link_list = new_chunk_link_list - new_images = [] - for image in images: - if image['chunk_id'] in chunk_id_set: - new_images.append(image) - images = new_images - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Parse document {document_entity.name} completed, waiting for uploading', - current_stage=2, - stage_cnt=7 - )) - full_text = '' - for chunk in chunk_list: - full_text += chunk['text'] - await parser.upload_full_text_to_database(document_entity.id, full_text) - await parser.upload_chunks_to_database(chunk_list) - await parser.upload_chunk_links_to_database(chunk_link_list) - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Upload document {document_entity.name} full text chunk and link completed', - current_stage=3, - stage_cnt=7 - )) - - await parser.upload_images_to_minio(images) - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Upload document {document_entity.name} images to minio completed', - current_stage=4, - stage_cnt=7 - )) - await parser.upload_images_to_database(images) - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Upload document {document_entity.name} images to pg completed', - current_stage=5, - stage_cnt=7 - )) - - vectors = await parser.embedding_chunks(chunk_list) - await parser.upload_vectors_to_database(vectors) - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Upload document {document_entity.name} vectors completed', - current_stage=6, - stage_cnt=7 - )) - - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Parse document {task_entity.id} succcessfully', - current_stage=7, - stage_cnt=7 - )) - await DocumentManager.update(task_entity.op_id, {'status': DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_STATUS_PENDING}) - await TaskManager.update(task_entity.id, {'status': TaskConstant.TASK_STATUS_SUCCESS}) - TaskRedisHandler.put_task_by_tail(config['REDIS_SUCCESS_TASK_QUEUE_NAME'], str(task_entity.id)) - except Exception as e: - import traceback - print(traceback.format_exc()) - TaskRedisHandler.put_task_by_tail(config['REDIS_RESTART_TASK_QUEUE_NAME'], str(task_entity.id)) - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Parse document {task_entity.id} failed due to {e}', - current_stage=7, - stage_cnt=7 - )) - finally: - if target_dir and os.path.exists(target_dir): - shutil.rmtree(target_dir) # 清理文件 - - @staticmethod - async def handle_parser_temporary_document_task(t_id: uuid.UUID): - target_dir = None - try: - task_entity = await TaskManager.select_by_id(t_id) - if task_entity.status != TaskConstant.TASK_STATUS_RUNNING: - TaskRedisHandler.put_task_by_tail(config['REDIS_SILENT_ERROR_TASK_QUEUE_NAME'], str(task_entity.id)) - return - # 文件解析主入口 - - document_entity = await TemporaryDocumentManager.select_by_id(task_entity.op_id) - target_dir = os.path.join(OssConstant.PARSER_SAVE_FOLDER, str(document_entity.id), secrets.token_hex(16)) - if not os.path.exists(target_dir): - os.makedirs(target_dir) - file_extension = document_entity.extension.lower() - file_path = target_dir + '/' + str(document_entity.id) + file_extension - await MinIO.download_object(document_entity.bucket_name, str(document_entity.id), - file_path) - - await TaskStatusReportManager.insert( - TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Temporary document {document_entity.name} begin to parse', - current_stage=1, - stage_cnt=6 - )) - - parser = ParserService() - parser_result = await parser.parser(document_entity.id, file_path, is_temporary_document=True) - chunk_list, chunk_link_list, images = parser_result['chunk_list'], parser_result['chunk_link_list'], parser_result[ - 'image_chunks'] - new_chunk_list=[] - for chunk in chunk_list: - if len(chunk['text'].strip())!=0: - new_chunk_list.append(chunk) - chunk_list = new_chunk_list - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Parse temporary document {document_entity.name} completed, waiting for uploading', - current_stage=2, - stage_cnt=6 - )) - full_text = '' - for chunk in chunk_list: - full_text += chunk['text'] - await parser.upload_full_text_to_database(document_entity.id, full_text, is_temporary_document=True) - await parser.upload_chunks_to_database(chunk_list, is_temporary_document=True) - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Upload temporary document {document_entity.name} full text chunks completed', - current_stage=3, - stage_cnt=6 - )) - await parser.upload_images_to_minio(images, is_temporary_document=True) - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Upload temporary document {document_entity.name} images completed', - current_stage=4, - stage_cnt=6 - )) - vectors = await parser.embedding_chunks(chunk_list, is_temporary_document=True) - await parser.upload_vectors_to_database(vectors, is_temporary_document=True) - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Upload temporary document {document_entity.name} vectors completed', - current_stage=5, - stage_cnt=6 - )) - - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Parse temporary document {task_entity.id} succcessfully', - current_stage=6, - stage_cnt=6 - )) - await TaskManager.update(task_entity.id, {'status': TaskConstant.TASK_STATUS_SUCCESS}) - TaskRedisHandler.put_task_by_tail(config['REDIS_SUCCESS_TASK_QUEUE_NAME'], str(task_entity.id)) - except Exception as e: - TaskRedisHandler.put_task_by_tail(config['REDIS_RESTART_TASK_QUEUE_NAME'], str(task_entity.id)) - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Parse temporary document {task_entity.id} failed due to {e}', - current_stage=5, - stage_cnt=5 - )) - finally: - if target_dir and os.path.exists(target_dir): - shutil.rmtree(target_dir) # 清理文件 diff --git a/data_chain/apps/base/task/knowledge_base_task_handler.py b/data_chain/apps/base/task/knowledge_base_task_handler.py deleted file mode 100644 index 42bd77b..0000000 --- a/data_chain/apps/base/task/knowledge_base_task_handler.py +++ /dev/null @@ -1,324 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -import os -import secrets -import shutil -import uuid -from data_chain.logger.logger import logger as logging - -from data_chain.models.constant import KnowledgeStatusEnum,ParseMethodEnum -from data_chain.apps.base.document.zip_handler import ZipHandler -from data_chain.apps.base.task.task_handler import TaskRedisHandler -from data_chain.manager.document_manager import DocumentManager -from data_chain.manager.document_type_manager import DocumentTypeManager -from data_chain.manager.knowledge_manager import KnowledgeBaseManager -from data_chain.manager.task_manager import TaskManager -from data_chain.manager.task_manager import TaskStatusReportManager -from data_chain.manager.user_manager import UserManager -from data_chain.models.constant import DocumentEmbeddingConstant, OssConstant, TaskConstant -from data_chain.stores.minio.minio import MinIO -from data_chain.stores.postgres.postgres import DocumentEntity, KnowledgeBaseEntity, TaskEntity, TaskStatusReportEntity -from data_chain.config.config import config -import yaml - - - - - -class KnowledgeBaseTaskHandler(): - @staticmethod - async def parse_knowledge_base_document_yaml_file( - knowledge_base_entity: KnowledgeBaseEntity, unzip_folder_path: str): - document_path = os.path.join(unzip_folder_path, "document") - document_yaml_path = os.path.join(unzip_folder_path, "document_yaml") - document_type_entity_list = await DocumentTypeManager.select_by_knowledge_base_id(knowledge_base_entity.id) - document_type_dict = {document_type_entity.type: document_type_entity.id - for document_type_entity in document_type_entity_list} - document_entity_list = [] - - file_path_list = [] - parse_methods=set(ParseMethodEnum.get_all_values()) - for root, _, files in os.walk(document_path): - for file in files: - file_path = os.path.join(root, file) - file_size = os.path.getsize(file_path) - yaml_file_path = os.path.join(document_yaml_path, file+'.yaml') - try: - with open(yaml_file_path, 'r')as yaml_file: - data = yaml.safe_load(yaml_file) - except Exception: - logging.error('文件缺少配置文件或者配置文件损坏') - continue - # 写入document表 - file_name = data.get('name', file) - if await DocumentManager.select_by_knowledge_base_id_and_file_name(knowledge_base_entity.id, file_name=file_name): - name = os.path.splitext(file_name)[0] - extension = os.path.splitext(file_name)[1] - if len(name)>=128: - name = name[:128] - file_name = name+'_'+secrets.token_hex(16)+extension - parser_method=data.get('parser_method', knowledge_base_entity.default_parser_method) - if parser_method not in parse_methods: - parser_method=ParseMethodEnum.GENERAL - document_entity = DocumentEntity( - kb_id=knowledge_base_entity.id,\ - user_id=knowledge_base_entity.user_id,\ - name=file_name,\ - extension=data.get('extension', ''),\ - size=file_size,\ - parser_method=parser_method,\ - type_id=document_type_dict.get( - data['type'], - uuid.UUID('00000000-0000-0000-0000-000000000000') - ),\ - chunk_size=data.get( - 'chunk_size', - knowledge_base_entity.default_chunk_size - ),\ - enabled=True,\ - status=DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_STATUS_RUNNING - ) - document_entity_list.append(document_entity) - file_path_list.append(file_path) - await DocumentManager.insert_bulk(document_entity_list) - for i in range(len(document_entity_list)): - await MinIO.put_object(OssConstant.MINIO_BUCKET_DOCUMENT, file_index=str(document_entity_list[i].id), - file_path=file_path_list[i]) - # 最后更新下knowledge_base的文档数量和文档大小 - total_cnt,total_sz=await DocumentManager.select_cnt_and_sz_by_kb_id(knowledge_base_entity.id) - update_dict = {'document_number': total_cnt, - 'document_size': total_sz} - await KnowledgeBaseManager.update(knowledge_base_entity.id, update_dict) - return document_entity_list - - @staticmethod - async def handle_import_knowledge_base_task(t_id: uuid.UUID): - unzip_folder_path=None - try: - task_entity=await TaskManager.select_by_id(t_id) - if task_entity.status != TaskConstant.TASK_STATUS_RUNNING: - TaskRedisHandler.put_task_by_tail( - config['REDIS_SILENT_ERROR_TASK_QUEUE_NAME'], - str(task_entity.id) - ) - return - # 定义目标目录 - kb_id = task_entity.op_id - knowledge_base_entity = await KnowledgeBaseManager.select_by_id(kb_id) - user_entity = await UserManager.get_user_info_by_user_id(task_entity.user_id) - unzip_folder_path = os.path.join(OssConstant.IMPORT_FILE_SAVE_FOLDER, - str(user_entity.id), secrets.token_hex(16)) - # 创建目录 - if os.path.exists(unzip_folder_path): - shutil.rmtree(unzip_folder_path) - os.makedirs(unzip_folder_path) - zip_file_path = os.path.join(unzip_folder_path, str(kb_id)+'.zip') - # todo 下面两个子过程记录task report - if not await MinIO.download_object(OssConstant.MINIO_BUCKET_KNOWLEDGEBASE, str(kb_id), zip_file_path): - await TaskStatusReportManager.insert( - TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Download knowledge base {kb_id} zip file failed', - current_stage=3, - stage_cnt=3 - ) - ) - await KnowledgeBaseManager.update(kb_id, {'status': KnowledgeStatusEnum.IDLE}) - await TaskManager.update(task_entity.id, {'status': TaskConstant.TASK_STATUS_FAILED}) - TaskRedisHandler.put_task_by_tail(config['REDIS_SUCCESS_TASK_QUEUE_NAME'], str(task_entity.id)) - return - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Download knowledge base {kb_id} zip file succcessfully', - current_stage=1, - stage_cnt=3 - )) - if not await ZipHandler.unzip_file(zip_file_path, unzip_folder_path): - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Unzip knowledge base {kb_id} zip file failed', - current_stage=3, - stage_cnt=3 - )) - await KnowledgeBaseManager.update(kb_id, {'status': KnowledgeStatusEnum.IDLE}) - await TaskManager.update(task_entity.id, {'status': TaskConstant.TASK_STATUS_SUCCESS}) - TaskRedisHandler.put_task_by_tail(config['REDIS_SUCCESS_TASK_QUEUE_NAME'], str(task_entity.id)) - return - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Unzip knowledge base {kb_id} zip file succcessfully', - current_stage=2, - stage_cnt=3 - )) - document_entity_list = await KnowledgeBaseTaskHandler.\ - parse_knowledge_base_document_yaml_file( - knowledge_base_entity,\ - unzip_folder_path - ) - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Save document and parse yaml from knowledge base {kb_id} zip file succcessfully', - current_stage=3, - stage_cnt=3 - )) - await KnowledgeBaseManager.update(kb_id, {'status': KnowledgeStatusEnum.IDLE}) - await TaskManager.update(task_entity.id, {'status': TaskConstant.TASK_STATUS_SUCCESS}) - for document_entity in document_entity_list: - doc_task_entity = await TaskManager.insert(TaskEntity(user_id=user_entity.id, - op_id=document_entity.id, - type=TaskConstant.PARSE_DOCUMENT, - retry=0, - status=TaskConstant.TASK_STATUS_PENDING)) - # 提交redis任务队列 - TaskRedisHandler.put_task_by_tail(config['REDIS_PENDING_TASK_QUEUE_NAME'], str(doc_task_entity.id)) - TaskRedisHandler.put_task_by_tail(config['REDIS_SUCCESS_TASK_QUEUE_NAME'], str(task_entity.id)) - except Exception as e: - await TaskStatusReportManager.insert(TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Export knowledge base {kb_id} zip file failed due to {e}', - current_stage=0, - stage_cnt=4 - )) - TaskRedisHandler.put_task_by_tail(config['REDIS_RESTART_TASK_QUEUE_NAME'], str(task_entity.id)) - logging.error("Import knowledge base error: {}".format(e)) - finally: - if unzip_folder_path and os.path.exists(unzip_folder_path): - shutil.rmtree(unzip_folder_path) - - @staticmethod - async def handle_export_knowledge_base_task(t_id: uuid.UUID): - knowledge_yaml_path=None - try: - task_entity=await TaskManager.select_by_id(t_id) - if task_entity.status != TaskConstant.TASK_STATUS_RUNNING: - TaskRedisHandler.put_task_by_tail(config['REDIS_SILENT_ERROR_TASK_QUEUE_NAME'], str(task_entity.id)) - return - knowledge_base_entity = await KnowledgeBaseManager.select_by_id(task_entity.op_id) - user_entity = await UserManager.get_user_info_by_user_id(knowledge_base_entity.user_id) - zip_file_path = os.path.join(OssConstant.ZIP_FILE_SAVE_FOLDER, str(user_entity.id)) - knowledge_yaml_path = os.path.join(zip_file_path, secrets.token_hex(16)) - document_path = os.path.join(knowledge_yaml_path, "document") - document_yaml_path = os.path.join(knowledge_yaml_path, "document_yaml") - if not os.path.exists(zip_file_path): - os.makedirs(zip_file_path) - if os.path.exists(knowledge_yaml_path): - shutil.rmtree(knowledge_yaml_path) - os.makedirs(knowledge_yaml_path) - os.makedirs(document_path) - os.makedirs(document_yaml_path) - document_type_entity_list = await DocumentTypeManager.select_by_knowledge_base_id(knowledge_base_entity.id) - # 写入knowledge_base.yaml文件 - with open(os.path.join(knowledge_yaml_path, "knowledge_base.yaml"), 'w') as knowledge_yaml_file: - - knowledge_dict = knowledge_dict = { - 'default_chuk_size': knowledge_base_entity.default_chunk_size, - 'default_parser_method': knowledge_base_entity.default_parser_method, - 'description': knowledge_base_entity.description, - 'document_number': knowledge_base_entity.document_number, - 'document_size': knowledge_base_entity.document_size, - 'embedding_model': knowledge_base_entity.embedding_model, - 'language': knowledge_base_entity.language, - 'name': knowledge_base_entity.name - } - knowledge_dict['document_type_list'] = list( - set([document_type_entity.type - for document_type_entity in document_type_entity_list] - ) - ) - yaml.dump(knowledge_dict, knowledge_yaml_file) - await TaskStatusReportManager.insert( - TaskStatusReportEntity( - task_id=task_entity.id,\ - message=f'Save knowledge base yaml from knowledge base {knowledge_base_entity.id} succcessfully',\ - current_stage=1,\ - stage_cnt=4\ - ) - ) - document_type_entity_map = { - str(document_type_entity.id): document_type_entity.type - for document_type_entity in document_type_entity_list} - document_entity_list = await DocumentManager.select_by_knowledge_base_id(knowledge_base_entity.id) - for document_entity in document_entity_list: - try: - if not await MinIO.download_object(OssConstant.MINIO_BUCKET_DOCUMENT, str( - document_entity.id), os.path.join(document_path, document_entity.name)): - continue - with open(os.path.join(document_yaml_path, document_entity.name+".yaml"), 'w') as yaml_file: - doc_type="default type" - if str(document_entity.type_id) in document_type_entity_map.keys(): - doc_type = document_type_entity_map[str(document_entity.type_id)] - yaml.dump( - { - 'name': document_entity.name,\ - 'extension': document_entity.extension,\ - 'parser_method': document_entity.parser_method,\ - 'chunk_size':document_entity.chunk_size,\ - 'type': doc_type}, - yaml_file) - except Exception as e: - logging.error(f"Download document {document_entity.id} failed due to: {e}") - continue - await TaskStatusReportManager.insert( - TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Save document and yaml from knowledge base {knowledge_base_entity.id} succcessfully', - current_stage=2, - stage_cnt=4 - )) - # 最后压缩export目录, 并放入minIO - save_knowledge_base_zip_file_name = str(task_entity.id)+".zip" - await ZipHandler.zip_dir(knowledge_yaml_path, os.path.join(zip_file_path, save_knowledge_base_zip_file_name)) - await TaskStatusReportManager.insert( - TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Zip knowledge base {knowledge_base_entity.id} succcessfully', - current_stage=3, - stage_cnt=4 - )) - # 上传到minIO exportzip桶 - res = await MinIO.put_object( - OssConstant.MINIO_BUCKET_KNOWLEDGEBASE,\ - str(task_entity.id),\ - os.path.join( - zip_file_path,\ - save_knowledge_base_zip_file_name\ - ) - ) - if res: - await TaskStatusReportManager.insert( - TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Update knowledge base {knowledge_base_entity.id} zip file {save_knowledge_base_zip_file_name} to Minio succcessfully', - current_stage=4, - stage_cnt=4 - ) - ) - else: - await TaskStatusReportManager.insert( - TaskStatusReportEntity( - task_id=task_entity.id, - message=f'Update knowledge base {knowledge_base_entity.id} zip file {save_knowledge_base_zip_file_name} to Minio failed', - current_stage=4, - stage_cnt=4 - ) - ) - raise Exception(f'Update knowledge base {knowledge_base_entity.id} zip file {save_knowledge_base_zip_file_name} to Minio failed') - await KnowledgeBaseManager.update(knowledge_base_entity.id, {'status': KnowledgeStatusEnum.IDLE}) - await TaskManager.update(task_entity.id, {'status': TaskConstant.TASK_STATUS_SUCCESS}) - TaskRedisHandler.put_task_by_tail(config['REDIS_SUCCESS_TASK_QUEUE_NAME'], str(task_entity.id)) - except Exception as e: - await TaskStatusReportManager.insert( - TaskStatusReportEntity( - task_id=task_entity.id,\ - message=f'Import knowledge base {task_entity.op_id} failed due to {e}',\ - current_stage=0,\ - stage_cnt=4\ - ) - ) - TaskRedisHandler.put_task_by_tail( - config['REDIS_RESTART_TASK_QUEUE_NAME'],\ - str(task_entity.id) - ) - logging.error(f"Export knowledge base zip files errordue to {e}") - finally: - if knowledge_yaml_path and os.path.exists(knowledge_yaml_path): - shutil.rmtree(knowledge_yaml_path) diff --git a/data_chain/apps/base/task/message_queue.py b/data_chain/apps/base/task/message_queue.py new file mode 100644 index 0000000..5e27061 --- /dev/null +++ b/data_chain/apps/base/task/message_queue.py @@ -0,0 +1,33 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from data_chain.stores.mongodb import MongoDB + + +class TaskQueue: + """任务队列""" + @staticmethod + async def init_task_queue(): + pass + + @staticmethod + async def init_task(): + pass + + @staticmethod + async def handle_pending_tasks(): + pass + + @staticmethod + async def handle_running_tasks(): + pass + + @staticmethod + async def handle_successed_tasks(): + pass + + @staticmethod + async def handle_failed_tasks(): + pass + + @staticmethod + async def handle_tasks(): + pass diff --git a/data_chain/apps/base/task/process_handler.py b/data_chain/apps/base/task/process_handler.py new file mode 100644 index 0000000..0171a67 --- /dev/null +++ b/data_chain/apps/base/task/process_handler.py @@ -0,0 +1,67 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. +from data_chain.logger.logger import logger as logging +import os +import signal +import multiprocessing +import uuid +import asyncio +from data_chain.config.config import config + +multiprocessing = multiprocessing.get_context('spawn') + + +class ProcessHandler: + ''' 进程处理器类''' + tasks = {} # 存储进程的字典 + lock = multiprocessing.Lock() # 创建一个锁对象 + max_processes = min( + max((os.cpu_count() or 1) // 2, 1), + config['DOCUMENT_PARSE_USE_CPU_LIMIT']) # 获取CPU核心数作为最大进程数,默认为1 + + @staticmethod + def subprocess_target(target, *args, **kwargs): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(target(*args, **kwargs)) + finally: + loop.close() + + @staticmethod + def add_task(task_id: uuid.UUID, target, *args, **kwargs): + with ProcessHandler.lock: + if len(ProcessHandler.tasks) >= ProcessHandler.max_processes: + warning = f"任务数量已达上限({ProcessHandler.max_processes}),请稍后再试。" + logging.warning(f"[ProcessHandler] %s", warning) + return False + + if task_id not in ProcessHandler.tasks: + process = multiprocessing.Process(target=ProcessHandler.subprocess_target, + args=(target,) + args, kwargs=kwargs) + ProcessHandler.tasks[task_id] = process + process.start() + else: + info = f"任务ID {task_id} 已存在,无法添加。" + logging.info(f"[ProcessHandler] %s", info) + return True + + @staticmethod + def remove_task(task_id: uuid.UUID): + with ProcessHandler.lock: + if task_id in ProcessHandler.tasks.keys(): + process = ProcessHandler.tasks[task_id] + try: + if process.is_alive(): + pid = process.pid + os.kill(pid, signal.SIGKILL) + info = f"进程 {task_id} ({pid}) 被杀死。" + logging.info(f"[ProcessHandler] %s", info) + except Exception as e: + warning = f"杀死进程 {task_id} 失败: {e}" + logging.warning(f"[ProcessHandler] %s", warning) + del ProcessHandler.tasks[task_id] + info = f"任务ID {task_id} 被删除。" + logging.info(f"[ProcessHandler] %s", info) + else: + waring = f"任务ID {task_id} 不存在,无法删除。" + logging.warning(f"[ProcessHandler] %s", waring) diff --git a/data_chain/apps/base/task/task_handler.py b/data_chain/apps/base/task/task_handler.py deleted file mode 100644 index 4ea0c21..0000000 --- a/data_chain/apps/base/task/task_handler.py +++ /dev/null @@ -1,179 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from data_chain.logger.logger import logger as logging -from typing import List -import os -import signal -import multiprocessing -import uuid -import asyncio -import sys -from data_chain.config.config import config -from data_chain.stores.redis.redis import RedisConnectionPool -from data_chain.manager.task_manager import TaskManager -from data_chain.manager.document_manager import DocumentManager -from data_chain.manager.knowledge_manager import KnowledgeBaseManager -from data_chain.manager.chunk_manager import ChunkManager,TemporaryChunkManager -from data_chain.models.constant import TaskConstant, DocumentEmbeddingConstant, KnowledgeStatusEnum, OssConstant, TaskActionEnum -from data_chain.stores.minio.minio import MinIO - -multiprocessing = multiprocessing.get_context('spawn') -class TaskHandler: - tasks = {} # 存储进程的字典 - lock = multiprocessing.Lock() # 创建一个锁对象 - max_processes = min(max((os.cpu_count() or 1)//2, 1),config['DOCUMENT_PARSE_USE_CPU_LIMIT']) # 获取CPU核心数作为最大进程数,默认为1 - - @staticmethod - def subprocess_target(target, *args, **kwargs): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - try: - loop.run_until_complete(target(*args, **kwargs)) - finally: - loop.close() - - @staticmethod - def add_task(task_id: uuid.UUID, target, *args, **kwargs): - with TaskHandler.lock: - if len(TaskHandler.tasks)>= TaskHandler.max_processes: - logging.info("Reached maximum number of active processes.") - return False - - if task_id not in TaskHandler.tasks: - process = multiprocessing.Process(target=TaskHandler.subprocess_target, - args=(target,) + args, kwargs=kwargs) - TaskHandler.tasks[task_id] = process - process.start() - else: - logging.info(f"Task ID {task_id} already exists.") - return True - - @staticmethod - def remove_task(task_id: uuid.UUID): - with TaskHandler.lock: - if task_id in TaskHandler.tasks.keys(): - process = TaskHandler.tasks[task_id] - try: - if process.is_alive(): - pid = process.pid - # TODO:优化杀死机制,考虑僵尸队列 - os.kill(pid, signal.SIGKILL) - logging.info(f"Process {task_id} ({pid}) killed.") - logging.info(f"Process {task_id} ({pid}) removed.") - except Exception as e: - logging.error(f"Process killed failed due to {e}") - del TaskHandler.tasks[task_id] - else: - logging.info(f"Task ID {task_id} does not exist.") - - @staticmethod - def get_task(task_id): - with TaskHandler.lock: - return TaskHandler.tasks.get(task_id, None) - - @staticmethod - def list_tasks(): - with TaskHandler.lock: - return list(TaskHandler.tasks.keys()) - - @staticmethod - def is_alive(task_id): - process = TaskHandler.get_task(task_id) - try: - alive = process.is_alive() - except Exception as e: - alive = False - logging.error(f"get process status failed due to {e}") - return alive - - @staticmethod - def check_and_adjust_active_count(): - with TaskHandler.lock: - TaskHandler.active_count = sum(process.is_alive() for process in TaskHandler.tasks.values()) - - @staticmethod - async def restart_or_clear_task(task_id: uuid.UUID, method=TaskActionEnum.RESTART): - TaskHandler.remove_task(task_id) - task_entity = await TaskManager.select_by_id(task_id) - if task_entity is None: - return - op_id = task_entity.op_id - task_type = task_entity.type - if task_entity.retry < 3 and method == TaskActionEnum.RESTART: - await TaskManager.update(task_id, {"retry": task_entity.retry+1}) - if task_type == TaskConstant.PARSE_DOCUMENT: - await DocumentManager.update(op_id, {'status': DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_STATUS_RUNNING}) - await ChunkManager.delete_by_document_ids([op_id]) - elif task_type == TaskConstant.PARSE_TEMPORARY_DOCUMENT: - await TemporaryChunkManager.delete_by_temporary_document_ids([op_id]) - elif task_type == TaskConstant.IMPORT_KNOWLEDGE_BASE: - await KnowledgeBaseManager.delete(op_id) - elif task_type == TaskConstant.EXPORT_KNOWLEDGE_BASE: - await KnowledgeBaseManager.update(op_id, {'status': KnowledgeStatusEnum.EXPROTING}) - await TaskManager.update(task_id, {"status": TaskConstant.TASK_STATUS_PENDING}) - TaskRedisHandler.put_task_by_tail(config['REDIS_PENDING_TASK_QUEUE_NAME'], str(task_id)) - elif method == TaskActionEnum.RESTART or method == TaskActionEnum.CANCEL or method == TaskActionEnum.DELETE: - TaskRedisHandler.remove_task_by_task_id(config['REDIS_PENDING_TASK_QUEUE_NAME'], str(task_id)) - TaskRedisHandler.remove_task_by_task_id(config['REDIS_SUCCESS_TASK_QUEUE_NAME'], str(task_id)) - TaskRedisHandler.remove_task_by_task_id(config['REDIS_RESTART_TASK_QUEUE_NAME'], str(task_id)) - if method == TaskActionEnum.CANCEL: - await TaskManager.update(task_id, {"status": TaskConstant.TASK_STATUS_CANCELED}) - elif method == TaskActionEnum.DELETE: - await TaskManager.update(task_id, {"status": TaskConstant.TASK_STATUS_DELETED}) - else: - await TaskManager.update(task_id, {"status": TaskConstant.TASK_STATUS_FAILED}) - if task_type == TaskConstant.PARSE_DOCUMENT: - await DocumentManager.update(op_id, {'status': DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_STATUS_PENDING}) - await ChunkManager.delete_by_document_ids([op_id]) - elif task_type == TaskConstant.PARSE_TEMPORARY_DOCUMENT: - await TemporaryChunkManager.delete_by_temporary_document_ids([op_id]) - elif task_type == TaskConstant.IMPORT_KNOWLEDGE_BASE: - await KnowledgeBaseManager.delete(op_id) - await MinIO.delete_object(OssConstant.MINIO_BUCKET_KNOWLEDGEBASE, str(task_entity.op_id)) - elif task_type == TaskConstant.EXPORT_KNOWLEDGE_BASE: - await KnowledgeBaseManager.update(op_id, {'status': KnowledgeStatusEnum.IDLE}) - await MinIO.delete_object(OssConstant.MINIO_BUCKET_KNOWLEDGEBASE, str(task_id)) - - -class TaskRedisHandler(): - - @staticmethod - def clear_all_task(queue_name: str) -> None: - with RedisConnectionPool.get_redis_connection() as r: - try: - r.delete(queue_name) - except Exception as e: - logging.error(f"Clear queue error: {e}") - - @staticmethod - def select_all_task(queue_name: str) -> List[str]: - with RedisConnectionPool.get_redis_connection() as r: - try: - return r.lrange(queue_name, 0, -1) - except Exception as e: - logging.error(f"Select task error: {e}") - return [] - - @staticmethod - def get_task_by_head(queue_name: str): - with RedisConnectionPool.get_redis_connection() as r: - try: - return r.lpop(queue_name) - except Exception as e: - logging.error(f"Get first task error: {e}") - return None - - @staticmethod - def put_task_by_tail(queue_name: str, task_id: str): - with RedisConnectionPool.get_redis_connection() as r: - try: - return r.rpush(queue_name, task_id) - except Exception as e: - logging.error(f"Remove task error: {e}") - - @staticmethod - def remove_task_by_task_id(queue_name: str, task_id: str): - with RedisConnectionPool.get_redis_connection() as r: - try: - return r.lrem(queue_name, 0, task_id) - except Exception as e: - logging.error(f"Remove task error: {e}") diff --git a/data_chain/apps/base/task/worker/acc_testing_worker.py b/data_chain/apps/base/task/worker/acc_testing_worker.py new file mode 100644 index 0000000..9281791 --- /dev/null +++ b/data_chain/apps/base/task/worker/acc_testing_worker.py @@ -0,0 +1,452 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +import uuid +import os +import shutil +import yaml +import json +import random +import pandas as pd +from data_chain.apps.base.zip_handler import ZipHandler +from data_chain.config.config import config +from data_chain.logger.logger import logger as logging +from data_chain.apps.base.task.worker.base_worker import BaseWorker +from data_chain.llm.llm import LLM +from data_chain.rag.base_searcher import BaseSearcher +from data_chain.entities.enum import TaskType, TaskStatus, KnowledgeBaseStatus, DocumentStatus, DataSetStatus, QAStatus, TestingStatus, TestCaseStatus +from data_chain.entities.common import DEFAULt_DOC_TYPE_ID, TESTING_REPORT_PATH_IN_OS, TESTING_REPORT_PATH_IN_MINIO +from data_chain.parser.parse_result import ParseResult, ParseNode +from data_chain.parser.tools.token_tool import TokenTool +from data_chain.parser.handler.json_parser import JsonParser +from data_chain.parser.handler.yaml_parser import YamlParser +from data_chain.parser.handler.xlsx_parser import XlsxParser +from data_chain.manager.task_manager import TaskManager +from data_chain.manager.document_manager import DocumentManager +from data_chain.manager.chunk_manager import ChunkManager +from data_chain.manager.dataset_manager import DatasetManager +from data_chain.manager.testing_manager import TestingManager +from data_chain.manager.testcase_manager import TestCaseManager +from data_chain.manager.qa_manager import QAManager +from data_chain.manager.task_queue_mamanger import TaskQueueManager +from data_chain.stores.database.database import TaskEntity, QAEntity, DataSetEntity, DataSetDocEntity, TestingEntity, TestCaseEntity +from data_chain.stores.minio.minio import MinIO +from data_chain.stores.mongodb.mongodb import Task + + +class TestingWorker(BaseWorker): + """ + TestingWorker + """ + name = TaskType.TESTING_RUN.value + + @staticmethod + async def init(testing_id: uuid.UUID) -> uuid.UUID: + '''初始化任务''' + testing_entity = await TestingManager.get_testing_by_testing_id(testing_id) + if testing_entity is None: + err = f"[TestingWorker] 测试不存在,测试ID: {testing_id}" + logging.exception(err) + return None + testing_entity = await TestingManager.update_testing_by_testing_id(testing_id, {"status": DataSetStatus.PENDING.value}) + task_entity = TaskEntity( + team_id=testing_entity.team_id, + user_id=testing_entity.author_id, + op_id=testing_entity.id, + op_name=testing_entity.name, + type=TaskType.TESTING_RUN.value, + retry=0, + status=TaskStatus.PENDING.value) + task_entity = await TaskManager.add_task(task_entity) + await TestingManager.update_testing_by_testing_id(testing_id, { + "ave_score": -1, + "ave_pre": -1, + "ave_rec": -1, + "ave_fai": -1, + "ave_rel": -1, + "ave_lcs": -1, + "ave_leve": -1, + "ave_jac": -1, + }) + await MinIO.delete_object( + bucket_name=TESTING_REPORT_PATH_IN_MINIO, + object_name=str(testing_id) + ) + return task_entity.id + + @staticmethod + async def reinit(task_id: uuid.UUID) -> bool: + '''重新初始化任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[TestingWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return False + tmp_path = os.path.join(TESTING_REPORT_PATH_IN_OS, str(task_entity.id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + await TestingManager.update_testing_by_testing_id(task_entity.op_id, { + "ave_score": -1, + "ave_pre": -1, + "ave_rec": -1, + "ave_fai": -1, + "ave_rel": -1, + "ave_lcs": -1, + "ave_leve": -1, + "ave_jac": -1, + }) + await TestCaseManager.update_test_case_by_testing_id(task_entity.op_id, {"status": TestCaseStatus.DELETED.value}) + await MinIO.delete_object( + bucket_name=TESTING_REPORT_PATH_IN_MINIO, + object_name=str(task_entity.op_id) + ) + await TestingManager.update_testing_by_testing_id(task_entity.op_id, { + "ave_score": -1, + "ave_pre": -1, + "ave_rec": -1, + "ave_fai": -1, + "ave_rel": -1, + "ave_lcs": -1, + "ave_leve": -1, + "ave_jac": -1, + }) + if task_entity.retry < config['TASK_RETRY_TIME_LIMIT']: + await TestingManager.update_testing_by_testing_id(task_entity.op_id, {"status": TestingStatus.PENDING.value}) + return True + else: + await TestingManager.update_testing_by_testing_id(task_entity.op_id, {"status": TestingStatus.IDLE.value}) + return False + + @staticmethod + async def deinit(task_id: uuid.UUID) -> uuid.UUID: + '''析构任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[TestingWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + tmp_path = os.path.join(TESTING_REPORT_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + await TestingManager.update_testing_by_testing_id(task_entity.op_id, {"status": TestingStatus.IDLE.value}) + return task_id + + @staticmethod + async def init_path(task_id: uuid.UUID) -> tuple: + '''初始化路径''' + tmp_path = os.path.join(TESTING_REPORT_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + os.makedirs(tmp_path) + return tmp_path + + @staticmethod + async def testing(testing_entity: TestingEntity, qa_entities: list[QAEntity], llm: LLM) -> list[TestCaseEntity]: + '''测试数据集''' + test_case_entities = [] + with open(config['PROMPT_PATH'], 'r', encoding='utf-8') as f: + prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) + prompt_template = prompt_dict.get('INTENT_DETECT_PROMPT_TEMPLATE', '') + for qa_entity in qa_entities: + question = qa_entity.question + answer = qa_entity.answer + chunk = qa_entity.chunk + chunk_entities = await BaseSearcher.search(testing_entity.search_method, question) + doc_chunk_dict = {} + for chunk_entity in chunk_entities: + if chunk_entity.doc_id not in doc_chunk_dict: + doc_chunk_dict[chunk_entity.doc_id] = [] + doc_chunk_dict[chunk_entity.doc_id].append(chunk_entity) + bac_info = '' + for doc_id, chunk_entities in doc_chunk_dict.items(): + chunk_entities.sort(key=lambda x: x.global_offset) + document_entity = await DocumentManager.get_document_by_doc_id(doc_id) + sub_bac_info = f"文档名称: {document_entity.name}\n" + for chunk_entity in chunk_entities: + sub_bac_info += chunk_entity.text + bac_info += sub_bac_info+'\n' + bac_info = TokenTool.get_k_tokens_words_from_content(bac_info, llm.max_tokens) + prompt = prompt_template.format( + question=question, + bac_info=bac_info + ) + llm_answer = await llm.nostream([], prompt, '请输出答案') + sub_socres = [] + pre = await TokenTool.cal_precision(question, bac_info, llm) + if pre: + sub_socres.append(pre) + else: + pre = -1 + rec = await TokenTool.cal_recall(answer, llm_answer, llm) + if rec: + sub_socres.append(rec) + else: + rec = -1 + fai = await TokenTool.cal_faithfulness(question, llm_answer, bac_info, llm) + if fai: + sub_socres.append(fai) + else: + fai = -1 + rel = await TokenTool.cal_relevance(question, llm_answer, llm) + if rel: + sub_socres.append(rel) + else: + rel = -1 + lcs = TokenTool.cal_lcs(answer, llm_answer) + if lcs: + sub_socres.append(lcs) + else: + lcs = -1 + leve = TokenTool.cal_leve(answer, llm_answer) + if leve: + sub_socres.append(leve) + else: + leve = -1 + jac = TokenTool.cal_jac(answer, llm_answer) + if jac: + sub_socres.append(jac) + else: + jac = -1 + score = -1 + if sub_socres: + score = sum(sub_socres) / len(sub_socres) + test_case_entity = TestCaseEntity( + qa_id=qa_entity.id, + question=question, + answer=answer, + chunk=chunk, + doc_name=qa_entity.doc_name, + llm_answer=llm_answer, + related_chunk=bac_info, + score=score, + pre=pre, + rec=rec, + fai=fai, + rel=rel, + lcs=lcs, + leve=leve, + jac=jac + ) + test_case_entities.append(test_case_entity) + index = 0 + while index < len(test_case_entities): + await TestCaseManager.add_test_cases(test_case_entities[index:index+1024]) + index += 1024 + return test_case_entities + + @staticmethod + async def update_testing_score(testing_id: uuid.UUID, test_case_entities: list[TestCaseEntity]) -> None: + '''更新测试分数''' + score_list = [] + for test_case_entity in test_case_entities: + if test_case_entity.score != -1: + score_list.append(test_case_entity.score) + pre_list = [] + for test_case_entity in test_case_entities: + if test_case_entity.pre != -1: + pre_list.append(test_case_entity.pre) + rec_list = [] + for test_case_entity in test_case_entities: + if test_case_entity.rec != -1: + rec_list.append(test_case_entity.rec) + fai_list = [] + for test_case_entity in test_case_entities: + if test_case_entity.fai != -1: + fai_list.append(test_case_entity.fai) + rel_list = [] + for test_case_entity in test_case_entities: + if test_case_entity.rel != -1: + rel_list.append(test_case_entity.rel) + lcs_list = [] + for test_case_entity in test_case_entities: + if test_case_entity.lcs != -1: + lcs_list.append(test_case_entity.lcs) + leve_list = [] + for test_case_entity in test_case_entities: + if test_case_entity.leve != -1: + leve_list.append(test_case_entity.leve) + jac_list = [] + for test_case_entity in test_case_entities: + if test_case_entity.jac != -1: + jac_list.append(test_case_entity.jac) + ave_score = -1 + if score_list: + ave_score = sum(score_list) / len(score_list) + ave_pre = -1 + if pre_list: + ave_pre = sum(pre_list) / len(pre_list) + ave_rec = -1 + if rec_list: + ave_rec = sum(rec_list) / len(rec_list) + ave_fai = -1 + if fai_list: + ave_fai = sum(fai_list) / len(fai_list) + ave_rel = -1 + if rel_list: + ave_rel = sum(rel_list) / len(rel_list) + ave_lcs = -1 + if lcs_list: + ave_lcs = sum(lcs_list) / len(lcs_list) + ave_leve = -1 + if leve_list: + ave_leve = sum(leve_list) / len(leve_list) + ave_jac = -1 + if jac_list: + ave_jac = sum(jac_list) / len(jac_list) + await TestingManager.update_testing_by_testing_id(testing_id, { + "ave_score": ave_score, + "ave_pre": ave_pre, + "ave_rec": ave_rec, + "ave_fai": ave_fai, + "ave_rel": ave_rel, + "ave_lcs": ave_lcs, + "ave_leve": ave_leve, + "ave_jac": ave_jac + }) + + @staticmethod + async def generate_report_and_upload_to_minio( + dataset_entity: DataSetEntity, testing_entity: TestingEntity, testcase_entities: list[TestCaseEntity], + tmp_path: str): + '''生成报告并上传到minio''' + xlsx_path = os.path.join(tmp_path, "report.xlsx") + testing_doc_entities = await DatasetManager.list_dataset_document_by_dataset_id(testing_entity.dataset_id) + doc_ids = [] + for testing_doc_entity in testing_doc_entities: + doc_ids.append(testing_doc_entity.doc_id) + chunk_cnt = await ChunkManager.get_chunk_cnt_by_doc_ids(doc_ids) + chunk_tokens = await ChunkManager.get_chunk_tokens_by_doc_ids(doc_ids) + ave_chunk_tokens = 0 + if chunk_cnt != 0: + ave_chunk_tokens = chunk_tokens / chunk_cnt + config = { + 'dataset_name(数据集名称)': dataset_entity.name, + 'doc_cnt(文档数量)': len(testing_doc_entities), + 'chunk_cnt(分片数量)': chunk_cnt, + 'chunk_tokens(分片平均token数)': ave_chunk_tokens, + 'llm(大模型)': [config['MODEL_NAME']], + 'embedding_model(向量检索)': [config['EMBEDDING_MODEL_NAME']], + } + model_config_df = pd.DataFrame(config) + ave_result = { + 'ave_score(平均综合得分)': [testing_entity.score], + 'ave_pre(平均准确率)': [testing_entity.ave_pre], + 'ave_rec(平均召回率)': [testing_entity.ave_rec], + 'ave_fai(平均可信度)': [testing_entity.ave_fai], + 'ave_rel(平均相关度)': [testing_entity.ave_rel], + 'ave_lcs(平均最长公共子序列得分)': [testing_entity.ave_lcs], + 'ave_leve(平均编辑距离得分)': [testing_entity.ave_leve], + 'ave_jac(平均杰卡德相似度)': [testing_entity.ave_jac] + } + ave_result_df = pd.DataFrame(ave_result) + test_case_dict = { + 'question': [], + 'answer': [], + 'chunk': [], + 'doc_name': [], + 'llm_answer': [], + 'related_chunk': [], + 'score(综合得分)': [], + 'pre(准确率)': [], + 'rec(召回率)': [], + 'fai(可信度)': [], + 'rel(相关性)': [], + 'lcs(最长公共子序列得分)': [], + 'leve(编辑距离得分)': [], + 'jac(杰卡德相似度)': [] + } + for test_case_entity in testcase_entities: + test_case_dict['question'].append(test_case_entity.question) + test_case_dict['answer'].append(test_case_entity.answer) + test_case_dict['chunk'].append(test_case_entity.chunk) + test_case_dict['doc_name'].append(test_case_entity.doc_name) + test_case_dict['llm_answer'].append(test_case_entity.llm_answer) + test_case_dict['related_chunk'].append(test_case_entity.related_chunk) + test_case_dict['score(综合得分)'].append(test_case_entity.score) + test_case_dict['pre(准确率)'].append(test_case_entity.pre) + test_case_dict['rec(召回率)'].append(test_case_entity.rec) + test_case_dict['fai(可信度)'].append(test_case_entity.fai) + test_case_dict['rel(相关性)'].append(test_case_entity.rel) + test_case_dict['lcs(最长公共子序列得分)'].append(test_case_entity.lcs) + test_case_dict['leve(编辑距离得分)'].append(test_case_entity.leve) + test_case_dict['jac(杰卡德相似度)'].append(test_case_entity.jac) + test_case_df = pd.DataFrame(test_case_dict) + with pd.ExcelWriter(xlsx_path, engine='openpyxl') as writer: + model_config_df.to_excel(writer, sheet_name='config(配置)', index=False) + ave_result_df.to_excel(writer, sheet_name='ave_result(平均结果)', index=False) + test_case_df.to_excel(writer, sheet_name='test_case(测试结果)', index=False) + await MinIO.put_object( + bucket_name=TESTING_REPORT_PATH_IN_MINIO, + object_name=str(testing_entity.id), + ) + + @staticmethod + async def run(task_id: uuid.UUID) -> None: + '''运行任务''' + try: + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[TestingWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + raise err + testing_entity = await TestingManager.get_testing_by_testing_id(task_entity.op_id) + if testing_entity is None: + err = f"[TestingWorker] 测试不存在,测试ID: {task_id}" + logging.exception(err) + raise err + current_stage = 0 + stage_cnt = 4 + llm = LLM( + openai_api_key=config['OPENAI_API_KEY'], + openai_api_base=config['OPENAI_API_BASE'], + model_name=config['MODEL_NAME'], + max_tokens=config['MAX_TOKENS'], + ) + tmp_path = await TestingWorker.init_path(task_id) + current_stage += 1 + await TestingWorker.report(task_id, "初始化路径", current_stage, stage_cnt) + qa_entities = await QAManager.list_all_qa_by_dataset_id(testing_entity.dataset_id) + await TestingWorker.testing(testing_entity, qa_entities, llm) + current_stage += 1 + await TestingWorker.report(task_id, "测试完成", current_stage, stage_cnt) + await TestingWorker.update_testing_score(testing_entity.id, qa_entities) + current_stage += 1 + await TestingWorker.report(task_id, "更新测试分数", current_stage, stage_cnt) + dataset_entity = await DatasetManager.get_dataset_by_dataset_id(testing_entity.dataset_id) + await TestingWorker.generate_report_and_upload_to_minio(dataset_entity, testing_entity, qa_entities, tmp_path) + current_stage += 1 + await TestingWorker.report(task_id, "生成报告并上传到minio", current_stage, stage_cnt) + await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.SUCCESS.value)) + except Exception as e: + err = f"[TestingWorker] 任务失败,task_id: {task_id}, 错误信息: {e}" + logging.exception(err) + await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.FAILED.value)) + await TestingWorker.report(task_id, "任务失败", 0, 1) + + @staticmethod + async def stop(task_id: uuid.UUID) -> uuid.UUID: + '''停止任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[TestingWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + await TestingManager.update_testing_by_testing_id(task_entity.op_id, {"status": TestingStatus.IDLE.value}) + tmp_path = os.path.join(TESTING_REPORT_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + if task_entity.status == TaskStatus.PENDING.value or task_entity.status == TaskStatus.RUNNING.value or task_entity.status == TaskStatus.FAILED.value: + await TestCaseManager.update_test_case_by_testing_id(task_entity.op_id, {"status": TestCaseStatus.DELETED.value}) + await MinIO.delete_object( + bucket_name=TESTING_REPORT_PATH_IN_MINIO, + object_name=str(task_entity.op_id) + ) + await TestingManager.update_testing_by_testing_id(task_entity.op_id, { + "ave_score": -1, + "ave_pre": -1, + "ave_rec": -1, + "ave_fai": -1, + "ave_rel": -1, + "ave_lcs": -1, + "ave_leve": -1, + "ave_jac": -1, + }) + return task_id diff --git a/data_chain/apps/base/task/worker/base_worker.py b/data_chain/apps/base/task/worker/base_worker.py new file mode 100644 index 0000000..18e6637 --- /dev/null +++ b/data_chain/apps/base/task/worker/base_worker.py @@ -0,0 +1,107 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +import uuid + +from data_chain.apps.base.task.process_handler import ProcessHandler +from data_chain.config.config import config +from data_chain.entities.enum import TaskStatus +from data_chain.stores.database.database import DataBase, TaskReportEntity +from data_chain.stores.mongodb.mongodb import MongoDB, Task +from data_chain.manager.task_manager import TaskManager +from data_chain.manager.task_report_manager import TaskReportManager +from data_chain.manager.task_queue_mamanger import TaskQueueManager +from data_chain.logger.logger import logger as logging + + +class BaseWorker: + """ + BaseWorker + """ + name = "BaseWorker" + + @staticmethod + def find_worker_class(worker_name): + subclasses = BaseWorker.__subclasses__() + for subclass in subclasses: + if subclass.name == worker_name: + return subclass + return None + + @staticmethod + async def get_worker_name(task_id: uuid.UUID) -> str: + '''获取worker_name''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"获取任务失败, 任务ID: {task_id}" + logging.error("[BaseWorker] %s", err) + raise ValueError(err) + return task_entity.type + + @staticmethod + async def init(worker_name: str, op_id: uuid.UUID) -> uuid.UUID: + '''初始化任务''' + task_id = await (BaseWorker.find_worker_class(worker_name).init(op_id)) + await TaskManager.update_task_by_id(task_id, {"status": TaskStatus.PENDING.value}) + return task_id + + @staticmethod + async def reinit(task_id: uuid.UUID) -> bool: + '''重新初始化任务''' + worker_name = await BaseWorker.get_worker_name(task_id) + flag = await (BaseWorker.find_worker_class(worker_name).reinit(task_id)) + task_entity = await TaskManager.get_task_by_task_id(task_id) + if flag: + TaskManager.update_task_by_id(task_id, {"status": TaskStatus.PENDING.value, "retry": task_entity.retry + 1}) + return True + else: + await TaskManager.update_task_by_id(task_id, {"status": TaskStatus.FAILED.value}) + return False + + @staticmethod + async def deinit(task_id: uuid.UUID) -> uuid.UUID: + '''析构任务''' + worker_name = await BaseWorker.get_worker_name(task_id) + await (BaseWorker.find_worker_class(worker_name).deinit(task_id)) + await TaskManager.update_task_by_id(task_id, {"status": TaskStatus.SUCCESS.value}) + + @staticmethod + async def run(task_id: uuid.UUID) -> bool: + '''运行任务''' + worker_name = await BaseWorker.get_worker_name(task_id) + flag = ProcessHandler.add_task(BaseWorker.find_worker_class(worker_name).run, task_id) + await TaskManager.update_task_by_id(task_id, {"status": TaskStatus.RUNNING.value}) + return flag + + @staticmethod + async def stop(task_id: uuid.UUID) -> bool: + '''停止任务''' + worker_name = await BaseWorker.get_worker_name(task_id) + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity.status == TaskStatus.RUNNING.value: + await ProcessHandler.remove_task(task_id) + elif task_entity.status == TaskStatus.PENDING.value: + await TaskQueueManager.delete_task_by_id(task_id) + else: + return False + task_id = await (BaseWorker.find_worker_class(worker_name).stop(task_id)) + if task_entity.status == TaskStatus.PENDING.value or task_entity.status == TaskStatus.RUNNING.value: + await TaskManager.update_task_by_id(task_id, {"status": TaskStatus.CANCLED.value}) + return (task_id is not None) + + @staticmethod + async def delete(task_id: uuid.UUID) -> bool: + '''删除任务''' + worker_name = await BaseWorker.get_worker_name(task_id) + task_id = await (BaseWorker.find_worker_class(worker_name).delete(task_id)) + await TaskManager.update_task_by_id(task_id, {"status": TaskStatus.DELETED.value}) + return (task_id is not None) + + @staticmethod + async def report(task_id: uuid.UUID, report: str, current_stage: int, stage_cnt: int) -> bool: + '''报告任务''' + task_report_entity = TaskReportEntity( + task_id=task_id, + message=report, + current_stage=current_stage, + stage_cnt=stage_cnt + ) + await TaskReportManager.add_task_report(task_report_entity) diff --git a/data_chain/apps/base/task/worker/export_dataset_worker.py b/data_chain/apps/base/task/worker/export_dataset_worker.py new file mode 100644 index 0000000..0b85b62 --- /dev/null +++ b/data_chain/apps/base/task/worker/export_dataset_worker.py @@ -0,0 +1,207 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +import uuid +import os +import shutil +import yaml +import json +import random +import pandas as pd +from data_chain.apps.base.zip_handler import ZipHandler +from data_chain.config.config import config +from data_chain.logger.logger import logger as logging +from data_chain.apps.base.task.worker.base_worker import BaseWorker +from data_chain.llm.llm import LLM +from data_chain.entities.enum import TaskType, TaskStatus, KnowledgeBaseStatus, DocumentStatus, DataSetStatus, QAStatus +from data_chain.entities.common import DEFAULt_DOC_TYPE_ID, EXPORT_DATASET_PATH_IN_OS, EXPORT_DATASET_PATH_IN_MINIO +from data_chain.parser.parse_result import ParseResult, ParseNode +from data_chain.parser.tools.token_tool import TokenTool +from data_chain.parser.handler.json_parser import JsonParser +from data_chain.parser.handler.yaml_parser import YamlParser +from data_chain.parser.handler.xlsx_parser import XlsxParser +from data_chain.manager.task_manager import TaskManager +from data_chain.manager.chunk_manager import ChunkManager +from data_chain.manager.dataset_manager import DatasetManager +from data_chain.manager.qa_manager import QAManager +from data_chain.manager.task_queue_mamanger import TaskQueueManager +from data_chain.stores.database.database import TaskEntity, DocumentEntity, DocumentTypeEntity, QAEntity, DataSetEntity, DataSetDocEntity +from data_chain.stores.minio.minio import MinIO +from data_chain.stores.mongodb.mongodb import Task + + +class ExportDataSetWorker(BaseWorker): + """ + ExportDataSetWorker + """ + name = TaskType.DATASET_EXPORT.value + + @staticmethod + async def init(dataset_id: uuid.UUID) -> uuid.UUID: + '''初始化任务''' + dataset_entity = await DatasetManager.get_dataset_by_dataset_id(dataset_id) + if dataset_entity is None: + err = f"[ExportDataSetWorker] 数据集不存在,数据集ID: {dataset_id}" + logging.exception(err) + return None + dataset_entity = await DatasetManager.update_dataset_by_dataset_id(dataset_id, {"status": DataSetStatus.PENDING.value}) + task_entity = TaskEntity( + team_id=dataset_entity.team_id, + user_id=dataset_entity.author_id, + op_id=dataset_entity.id, + op_name=dataset_entity.name, + type=TaskType.DATASET_EXPORT.value, + retry=0, + status=TaskStatus.PENDING.value) + task_entity = await TaskManager.add_task(task_entity) + return task_entity.id + + @staticmethod + async def reinit(task_id: uuid.UUID) -> bool: + '''重新初始化任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ExportDataSetWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return False + tmp_path = os.path.join(EXPORT_DATASET_PATH_IN_OS, str(task_entity.id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + await MinIO.delete_object( + bucket_name=EXPORT_DATASET_PATH_IN_MINIO, + object_name=str(task_entity.id) + ) + if task_entity.retry < config['TASK_RETRY_TIME_LIMIT']: + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.PENDING.value}) + return True + else: + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.IDLE.value}) + return False + + @staticmethod + async def deinit(task_id: uuid.UUID) -> uuid.UUID: + '''析构任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ExportDataSetWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.IDLE.value}) + return task_id + + @staticmethod + async def init_path(task_id: uuid.UUID) -> tuple: + '''初始化路径''' + tmp_path = os.path.join(EXPORT_DATASET_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + os.makedirs(tmp_path) + source_path = os.path.join(EXPORT_DATASET_PATH_IN_OS, 'source') + target_path = os.path.join(tmp_path, 'zip') + return tmp_path, source_path, target_path + + @staticmethod + async def writ_qa_entity_to_file( + task_id: uuid.UUID, dataset_entity: DataSetEntity, qa_entities: list[QAEntity], + source_path: str, target_path: str) -> str: + '''从文件中加载QA实体''' + json_path = os.path.join(source_path, f"{dataset_entity.name}.json") + yaml_path = os.path.join(source_path, f"{dataset_entity.name}.yaml") + xlsx_path = os.path.join(source_path, f"{dataset_entity.name}.xlsx") + qa_dict = { + 'question': [], + 'answer': [], + 'chunk': [] + } + for qa_entity in qa_entities: + qa_dict['question'].append(qa_entity.question) + qa_dict['answer'].append(qa_entity.answer) + qa_dict['chunk'].append(qa_entity.chunk) + qa_df = pd.DataFrame(qa_dict) + with pd.ExcelWriter(xlsx_path, engine='openpyxl') as writer: + qa_df.to_excel(writer, sheet_name='qac', index=False) + qa_list = [] + for qa_entity in qa_entities: + qa_list.append({ + 'question': qa_entity.question, + 'answer': qa_entity.answer, + 'chunk': qa_entity.chunk + }) + with open(json_path, 'w', encoding='utf-8') as f: + json.dump(qa_list, f, indent=4) + with open(yaml_path, 'w', encoding='utf-8') as f: + yaml.dump(qa_list, f, allow_unicode=True) + zip_path = os.path.join(target_path, str(task_id)+'.zip') + await ZipHandler.zip_dir(source_path, zip_path) + return zip_path + + @staticmethod + async def upload_file_to_minio( + task_id: uuid.UUID, zip_path: str) -> None: + '''上传文件到minio''' + await MinIO.put_object( + bucket_name=EXPORT_DATASET_PATH_IN_MINIO, + object_name=str(task_id), + file_path=zip_path + ) + + @staticmethod + async def run(task_id: uuid.UUID) -> None: + '''运行任务''' + try: + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ExportDataSetWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + raise err + dataset_entity = await DatasetManager.get_dataset_by_dataset_id(task_entity.op_id) + if dataset_entity is None: + err = f"[ExportDataSetWorker] 数据集不存在,数据集ID: {task_id}" + logging.exception(err) + raise err + current_stage = 0 + stage_cnt = 3 + tmp_path, source_path, target_path = await ExportDataSetWorker.init_path(task_id) + current_stage += 1 + await ExportDataSetWorker.report(task_id, "正在导出数据集", current_stage, stage_cnt) + qa_entities = await QAManager.list_all_qa_by_dataset_id(dataset_entity.id) + zip_path = await ExportDataSetWorker.writ_qa_entity_to_file( + task_id, dataset_entity, qa_entities, source_path, target_path) + current_stage += 1 + await ExportDataSetWorker.report(task_id, "将qa对写入文件", current_stage, stage_cnt) + await ExportDataSetWorker.upload_file_to_minio(task_id, zip_path) + current_stage += 1 + await ExportDataSetWorker.report(task_id, "上传文件到minio", current_stage, stage_cnt) + await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.SUCCESS.value)) + except Exception as e: + err = f"[ExportDataSetWorker] 任务失败,task_id: {task_id}, 错误信息: {e}" + logging.exception(err) + await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.FAILED.value)) + await ExportDataSetWorker.report(task_id, "任务失败", 0, 1) + + @staticmethod + async def stop(task_id: uuid.UUID) -> uuid.UUID: + '''停止任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ImportDataSetWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.IDLE.value}) + tmp_path = os.path.join(EXPORT_DATASET_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + return task_id + + @staticmethod + async def delete(task_id: uuid.UUID) -> uuid.UUID: + '''删除任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ImportDataSetWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + if task_entity.status == TaskStatus.CANCLED or TaskStatus.FAILED.value: + await MinIO.delete_object( + bucket_name=EXPORT_DATASET_PATH_IN_MINIO, + object_name=str(task_entity.op_id) + ) + return task_id diff --git a/data_chain/apps/base/task/worker/export_knowledge_base_worker.py b/data_chain/apps/base/task/worker/export_knowledge_base_worker.py new file mode 100644 index 0000000..74f04ec --- /dev/null +++ b/data_chain/apps/base/task/worker/export_knowledge_base_worker.py @@ -0,0 +1,229 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +import uuid +import os +import shutil +import yaml +from data_chain.apps.base.zip_handler import ZipHandler +from data_chain.config.config import config +from data_chain.logger.logger import logger as logging +from data_chain.apps.base.task.worker.base_worker import BaseWorker +from data_chain.entities.enum import TaskType, TaskStatus, KnowledgeBaseStatus +from data_chain.entities.common import EXPORT_KB_PATH_IN_OS, DOC_PATH_IN_MINIO, EXPORT_KB_PATH_IN_MINIO +from data_chain.manager.task_manager import TaskManager +from data_chain.manager.knowledge_manager import KnowledgeBaseManager +from data_chain.manager.document_manager import DocumentManager +from data_chain.manager.task_queue_mamanger import TaskQueueManager +from data_chain.stores.database.database import TaskEntity, DocumentEntity +from data_chain.stores.minio.minio import MinIO +from data_chain.stores.mongodb.mongodb import Task + + +class ExportKnowledgeBaseWorker(BaseWorker): + """ + ExportKnowledgeBaseWorker + """ + name = TaskType.KB_EXPORT.value + + @staticmethod + async def init(kb_id: uuid.UUID) -> uuid.UUID: + '''初始化任务''' + knowledge_base_entity = await KnowledgeBaseManager.get_knowledge_base_by_kb_id(kb_id) + if knowledge_base_entity is None: + err = f"[ExportKnowledgeBaseWorker] 知识库不存在,知识库ID: {kb_id}" + logging.exception(err) + return None + if knowledge_base_entity.status != KnowledgeBaseStatus.IDLE.value: + warning = f"[ExportKnowledgeBaseWorker] 无法导出知识库,知识库ID: {kb_id},知识库状态: {knowledge_base_entity.status}" + logging.warning(warning) + return None + knowledge_base_entity = await KnowledgeBaseManager.update_knowledge_base_by_kb_id(kb_id, {"status": KnowledgeBaseStatus.PENDING.value}) + task_entity = TaskEntity( + team_id=knowledge_base_entity.team_id, + user_id=knowledge_base_entity.author_id, + op_id=knowledge_base_entity.id, + op_name=knowledge_base_entity.name, + type=TaskType.KB_EXPORT.value, + retry=0, + status=TaskStatus.PENDING.value) + task_entity = await TaskManager.add_task(task_entity) + await MinIO.delete_object( + bucket_name=EXPORT_KB_PATH_IN_MINIO, + object_name=str(task_entity.id) + ) + return task_entity.id + + @staticmethod + async def reinit(task_id: uuid.UUID) -> bool: + '''重新初始化任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ExportKnowledgeBaseWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return False + tmp_path = os.path.join(EXPORT_KB_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + await MinIO.delete_object( + bucket_name=EXPORT_KB_PATH_IN_MINIO, + object_name=str(task_entity.id) + ) + if task_entity.retry < config['TASK_RETRY_TIME_LIMIT']: + await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.PENDING.value}) + return True + else: + await MinIO.delete_object(EXPORT_KB_PATH_IN_MINIO, str(task_id)) + await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.IDLE.value}) + return False + + @staticmethod + async def deinit(task_id: uuid.UUID) -> uuid.UUID: + '''析构任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ExportKnowledgeBaseWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.IDLE.value}) + tmp_path = os.path.join(EXPORT_KB_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + return task_id + + @staticmethod + async def init_path(task_id: uuid.UUID) -> tuple: + '''初始化存放配置文件和文档的路径''' + tmp_path = os.path.join(EXPORT_KB_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + os.mkdir(tmp_path) + source_path = os.path.join(tmp_path, "source") + target_path = os.path.join(tmp_path, f"{task_id}.zip") + os.mkdir(source_path) + os.mkdir(target_path) + doc_config_path = os.path.join(source_path, "doc_config") + doc_download_path = os.path.join(source_path, "doc_download") + os.mkdir(doc_config_path) + os.mkdir(doc_download_path) + return (source_path, target_path, doc_config_path, doc_download_path) + + @staticmethod + async def create_knowledge_base_yaml_config(source_path: str, kb_id: uuid.UUID) -> None: + '''创建知识库yaml文件''' + knowledge_base_entity = await KnowledgeBaseManager.get_knowledge_base_by_kb_id(kb_id) + kb_dict = { + "name": knowledge_base_entity.name, + "tokenizer": knowledge_base_entity.tokenizer, + "description": knowledge_base_entity.description, + "embedding_model": knowledge_base_entity.embedding_model, + "upload_count_limit": knowledge_base_entity.upload_count_limit, + "upload_size_limit": knowledge_base_entity.upload_size_limit, + "default_parse_method": knowledge_base_entity.default_parse_method, + "default_chunk_size": knowledge_base_entity.default_chunk_size, + "doc_types": [] + } + doc_type_entities = await KnowledgeBaseManager.list_doc_types_by_kb_id(kb_id) + for doc_type_entity in doc_type_entities: + kb_dict["doc_types"].append({"id": doc_type_entity.id, "name": doc_type_entity.name}) + yaml_path = os.path.join(source_path, "kb_config.yaml") + with open(yaml_path, "w", encoding="utf-8", errors='ignore') as f: + yaml.dump(kb_dict, f, allow_unicode=True) + + @staticmethod + async def create_document_yaml_config(doc_config_path: str, kb_id: uuid.UUID) -> None: + '''创建文档yaml文件''' + doc_entities = await DocumentManager.list_all_document_by_kb_id(kb_id) + for doc_entity in doc_entities: + doc_dict = { + "name": doc_entity.name, + "extension": doc_entity.extension, + "size": doc_entity.size, + "parse_method": doc_entity.parse_method, + "chunk_size": doc_entity.chunk_size, + "type_id": doc_entity.type_id, + "enabled": doc_entity.enabled, + } + yaml_path = os.path.join(doc_config_path, f"{doc_entity.id}.yaml") + with open(yaml_path, "w", encoding="utf-8", errors='ignore') as f: + yaml.dump(doc_dict, f, allow_unicode=True) + pass + + @staticmethod + async def download_document_from_minio(doc_config_path: str, kb_id: uuid.UUID) -> None: + '''从minio下载文档''' + doc_entities = await DocumentManager.list_all_document_by_kb_id(kb_id) + for doc_entity in doc_entities: + local_path = os.path.join(doc_config_path, f"{doc_entity.id}") + await MinIO.download_object(DOC_PATH_IN_MINIO, str(doc_entity.id), local_path) + + @staticmethod + async def zip_config_and_document(source_path: str, target_path: str) -> None: + '''压缩配置文件和文档''' + await ZipHandler.zip_dir(source_path, target_path) + + @staticmethod + async def upload_zip_to_minio(target_path: str, task_id: uuid.UUID) -> None: + '''上传压缩包到minio''' + await MinIO.put_object(EXPORT_KB_PATH_IN_MINIO, str(task_id), target_path) + + @staticmethod + async def run(task_id: uuid.UUID) -> None: + '''运行任务''' + try: + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ExportKnowledgeBaseWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + raise err + await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.EXPORTING.value}) + current_stage = 0 + stage_cnt = 6 + source_path, target_path, doc_config_path, doc_download_path = await ExportKnowledgeBaseWorker.init_path(task_id) + current_stage += 1 + await ExportKnowledgeBaseWorker.report(task_id, "创建临时目录", current_stage, stage_cnt) + await ExportKnowledgeBaseWorker.create_knowledge_base_yaml_config(source_path, task_entity.op_id) + current_stage += 1 + await ExportKnowledgeBaseWorker.report(task_id, "创建知识库yaml配置文件", current_stage, stage_cnt) + await ExportKnowledgeBaseWorker.create_document_yaml_config(doc_config_path, task_entity.op_id) + current_stage += 1 + await ExportKnowledgeBaseWorker.report(task_id, "创建文档yaml配置文件", current_stage, stage_cnt) + await ExportKnowledgeBaseWorker.download_document_from_minio(doc_download_path, task_entity.op_id) + current_stage += 1 + await ExportKnowledgeBaseWorker.report(task_id, "下载文档", current_stage, stage_cnt) + await ExportKnowledgeBaseWorker.zip_config_and_document(source_path, target_path) + current_stage += 1 + await ExportKnowledgeBaseWorker.report(task_id, "压缩配置文件和文档", current_stage, stage_cnt) + await ExportKnowledgeBaseWorker.upload_zip_to_minio(target_path, task_id) + current_stage += 1 + await ExportKnowledgeBaseWorker.report(task_id, "上传压缩包到minio", current_stage, stage_cnt) + await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.SUCCESS.value)) + except Exception as e: + err = f"[ExportKnowledgeBaseWorker] 运行任务失败,task_id: {task_id},错误信息: {e}" + logging.exception(err) + await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.FAILED.value)) + await ExportKnowledgeBaseWorker.report(task_id, err, 0, 1) + + @staticmethod + async def stop(task_id: uuid.UUID) -> uuid.UUID: + '''停止任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ExportKnowledgeBaseWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.IDLE.value}) + tmp_path = os.path.join(EXPORT_KB_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + return task_id + + @staticmethod + async def delete(task_id) -> uuid.UUID: + '''删除任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ExportKnowledgeBaseWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + if task_entity.status == TaskStatus.CANCLED or TaskStatus.FAILED.value: + await MinIO.delete_object(EXPORT_KB_PATH_IN_MINIO, str(task_id)) + return task_id diff --git a/data_chain/apps/base/task/worker/generate_dataset_worker.py b/data_chain/apps/base/task/worker/generate_dataset_worker.py new file mode 100644 index 0000000..8d2a34c --- /dev/null +++ b/data_chain/apps/base/task/worker/generate_dataset_worker.py @@ -0,0 +1,264 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +import uuid +import os +import shutil +import yaml +import json +import random +from pydantic import BaseModel, Field +from data_chain.apps.base.zip_handler import ZipHandler +from data_chain.config.config import config +from data_chain.logger.logger import logger as logging +from data_chain.apps.base.task.worker.base_worker import BaseWorker +from data_chain.llm.llm import LLM +from data_chain.entities.enum import TaskType, TaskStatus, KnowledgeBaseStatus, DocumentStatus, DataSetStatus, QAStatus +from data_chain.entities.common import DEFAULt_DOC_TYPE_ID +from data_chain.parser.tools.token_tool import TokenTool +from data_chain.manager.task_manager import TaskManager +from data_chain.manager.chunk_manager import ChunkManager +from data_chain.manager.dataset_manager import DatasetManager +from data_chain.manager.qa_manager import QAManager +from data_chain.manager.task_queue_mamanger import TaskQueueManager +from data_chain.stores.database.database import TaskEntity, DocumentEntity, DocumentTypeEntity, QAEntity, DataSetEntity, DataSetDocEntity +from data_chain.stores.minio.minio import MinIO +from data_chain.stores.mongodb.mongodb import Task + + +class DocChunk(BaseModel): + doc_id: uuid.UUID + doc_name: str + chunks: list[str] + + +class GenerateDataSetWorker(BaseWorker): + """ + GenerateDataSetWorker + """ + name = TaskType.DATASET_GENERATE.value + + @staticmethod + async def init(dataset_id: uuid.UUID) -> uuid.UUID: + '''初始化任务''' + dataset_entity = await DatasetManager.get_dataset_by_dataset_id(dataset_id) + if dataset_entity is None: + err = f"[GenerateDataSetWorker] 数据集不存在,数据集ID: {dataset_id}" + logging.exception(err) + return None + dataset_entity = await DatasetManager.update_dataset_by_dataset_id(dataset_id, {"status": DataSetStatus.PENDING.value}) + task_entity = TaskEntity( + team_id=dataset_entity.team_id, + user_id=dataset_entity.author_id, + op_id=dataset_entity.id, + op_name=dataset_entity.name, + type=TaskType.DATASET_GENERATE.value, + retry=0, + status=TaskStatus.PENDING.value) + task_entity = await TaskManager.add_task(task_entity) + return task_entity.id + + @staticmethod + async def reinit(task_id: uuid.UUID) -> bool: + '''重新初始化任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[GenerateDataSetWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return False + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"score": 0}) + await QAManager.update_qa_by_dataset_id(task_entity.op_id, {"status": QAStatus.DELETED.value}) + if task_entity.retry < config['TASK_RETRY_TIME_LIMIT']: + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.PENDING.value}) + return True + else: + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.IDLE.value}) + return False + + @staticmethod + async def deinit(task_id: uuid.UUID) -> uuid.UUID: + '''析构任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[GenerateDataSetWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.IDLE.value}) + return task_id + + @staticmethod + async def get_chunks(dataset_entity: DataSetEntity) -> list[DocChunk]: + '''获取文档的分块信息''' + dataset_doc_entities = await DatasetManager.list_dataset_document_by_dataset_id(dataset_entity.id) + doc_chunks = [] + for dataset_doc_entity in dataset_doc_entities: + chunk_entities = await ChunkManager.list_all_chunk_by_doc_id(dataset_doc_entity.id) + chunks = [chunk_entity.text for chunk_entity in chunk_entities] + doc_chunk = DocChunk( + doc_id=dataset_entity.id, + doc_name=dataset_entity.name, + chunks=[] + ) + doc_chunk.chunks = chunks + doc_chunks.append(doc_chunk) + return doc_chunks + + @staticmethod + async def generate_qa(dataset_entity: DataSetEntity, doc_chunks: list[DocChunk], llm: LLM) -> list[QAEntity]: + chunk_cnt = 0 + for doc_chunk in doc_chunks: + chunk_cnt += len(doc_chunk.chunks) + if chunk_cnt == 0: + return [] + qa_entities = [] + division = dataset_entity.data_cnt // chunk_cnt + remainder = dataset_entity.data_cnt % chunk_cnt + index = 0 + random.shuffle(doc_chunks) + with open(config['PROMPT_PATH'], 'r', encoding='utf-8') as f: + prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) + q_generate_prompt_template = prompt_dict.get('GENREATE_QUESTION_FROM_CONTENT_PROMPT', '') + answer_generate_prompt_template = prompt_dict.get('GENERATE_ANSWER_FROM_QUESTION_AND_CONTENT_PROMPT', '') + cal_qa_score_prompt_template = prompt_dict.get('CAL_QA_SCORE_PROMPT', '') + dataset_score = 0 + for doc_chunk in doc_chunks: + + for i in range(len(doc_chunk.chunks)): + chunk = doc_chunk.chunks[i] + if dataset_entity.is_chunk_related: + if i > 0: + chunk = doc_chunk.chunks[i-1] + chunk + if i < len(doc_chunk.chunks) - 1: + chunk = chunk + doc_chunk.chunks[i+1] + qa_cnt = division + if index < remainder: + qa_cnt += 1 + qs = [] + answer = [] + rd = 5 + while len(qs) <= qa_cnt and rd > 0: + try: + sys_call = q_generate_prompt_template.format( + k=qa_cnt, + content=TokenTool.get_k_tokens_words_from_content(chunk, llm.max_tokens) + ) + usr_call = '请输出问题的列表' + sub_qs = await llm.nostream([], sys_call, usr_call) + sub_qs = json.loads(sub_qs) + except Exception as e: + err = f"[GenerateDataSetWorker] 生成问题失败,错误信息: {e}" + logging.exception(err) + continue + try: + for q in sub_qs: + sys_call = answer_generate_prompt_template.format( + content=TokenTool.get_k_tokens_words_from_content(chunk, llm.max_tokens//8*7), + question=TokenTool.get_k_tokens_words_from_content(q, llm.max_tokens//8) + ) + usr_call = '请输出答案' + sub_answer = await llm.nostream([], sys_call, usr_call) + sub_answer = json.loads(sub_answer) + except Exception as e: + err = f"[GenerateDataSetWorker] 生成答案失败,错误信息: {e}" + logging.exception(err) + continue + for q, answer in zip(sub_qs, sub_answer): + try: + if dataset_entity.is_data_cleared: + sys_call = cal_qa_score_prompt_template.format( + content=TokenTool.get_k_tokens_words_from_content(chunk, llm.max_tokens//9*4), + question=TokenTool.get_k_tokens_words_from_content(q, llm.max_tokens//9), + answer=TokenTool.get_k_tokens_words_from_content(answer, llm.max_tokens//9*4) + ) + usr_call = '请输出分数' + score = await llm.nostream([], sys_call, usr_call) + score = eval(score) + else: + score = 100 + if score > 60: + qs.append(q) + answer.append(answer) + dataset_score += score + except Exception as e: + err = f"[GenerateDataSetWorker] 计算分数失败,错误信息: {e}" + logging.exception(err) + continue + for q, ans in zip(qs, answer): + qa_entity = QAEntity( + dataset_id=dataset_entity.id, + doc_id=doc_chunk.doc_id, + doc_name=doc_chunk.doc_name, + question=q, + answer=ans, + chunk=chunk, + ) + qa_entities.append(qa_entity) + index += 1 + if len(qa_entities) > 0: + dataset_score = dataset_score / len(qa_entities) + await DatasetManager.update_dataset_by_dataset_id( + dataset_entity.id, {'score': dataset_score}) + + @staticmethod + async def add_qa_to_db(qa_entities: list[QAEntity]) -> None: + '''添加QA到数据库''' + index = 0 + while index < len(qa_entities): + try: + await QAManager.add_qa(qa_entities[index:index+1024]) + except Exception as e: + err = f"[GenerateDataSetWorker] 添加QA到数据库失败,错误信息: {e}" + logging.exception(err) + index += 1024 + + @staticmethod + async def run(task_id: uuid.UUID) -> None: + '''运行任务''' + try: + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[GenerateDataSetWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + raise err + llm = LLM( + openai_api_key=config['OPENAI_API_KEY'], + openai_api_base=config['OPENAI_API_BASE'], + model_name=config['MODEL_NAME'], + max_tokens=config['MAX_TOKENS'], + ) + dataset_entity = await DatasetManager.get_dataset_by_dataset_id(task_entity.op_id) + if dataset_entity is None: + err = f"[GenerateDataSetWorker] 数据集不存在,数据集ID: {task_entity.op_id}" + logging.exception(err) + raise err + await DatasetManager.update_dataset_by_dataset_id(dataset_entity.id, {"status": DataSetStatus.GENERATING.value}) + current_stage = 0 + stage_cnt = 3 + doc_chunks = await GenerateDataSetWorker.get_chunks(dataset_entity.id) + current_stage += 1 + await GenerateDataSetWorker.report(task_id, "获取文档分块信息", current_stage, stage_cnt) + qa_entities = await GenerateDataSetWorker.generate_qa( + dataset_entity, doc_chunks, llm) + current_stage += 1 + await GenerateDataSetWorker.report(task_id, "生成QA", current_stage, stage_cnt) + await GenerateDataSetWorker.add_qa_to_db(qa_entities) + current_stage += 1 + await GenerateDataSetWorker.report(task_id, "添加QA到数据库", current_stage, stage_cnt) + await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.SUCCESS.value)) + except Exception as e: + err = f"[GenerateDataSetWorker] 任务失败,task_id: {task_id},错误信息: {e}" + logging.exception(err) + await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.FAILED.value)) + await GenerateDataSetWorker.report(task_id, err, 0, 1) + + @staticmethod + async def stop(task_id: uuid.UUID) -> uuid.UUID: + '''停止任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ExportKnowledgeBaseWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.IDLE.value}) + if task_entity.status == TaskStatus.PENDING.value or task_entity.status == TaskStatus.RUNNING.value or task_entity.status == TaskStatus.FAILED.value: + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"score": 0}) + await QAManager.update_qa_by_dataset_id(task_entity.op_id, {"status": QAStatus.DELETED.value}) + return task_id diff --git a/data_chain/apps/base/task/worker/import_dataset_worker.py b/data_chain/apps/base/task/worker/import_dataset_worker.py new file mode 100644 index 0000000..cac5221 --- /dev/null +++ b/data_chain/apps/base/task/worker/import_dataset_worker.py @@ -0,0 +1,278 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +import uuid +import os +import shutil +import yaml +import json +import random +from data_chain.apps.base.zip_handler import ZipHandler +from data_chain.config.config import config +from data_chain.logger.logger import logger as logging +from data_chain.apps.base.task.worker.base_worker import BaseWorker +from data_chain.llm.llm import LLM +from data_chain.entities.enum import TaskType, TaskStatus, KnowledgeBaseStatus, DocumentStatus, DataSetStatus, QAStatus +from data_chain.entities.common import DEFAULt_DOC_TYPE_ID, IMPORT_DATASET_PATH_IN_OS, IMPORT_DATASET_PATH_IN_MINIO +from data_chain.parser.parse_result import ParseResult, ParseNode +from data_chain.parser.tools.token_tool import TokenTool +from data_chain.parser.handler.json_parser import JsonParser +from data_chain.parser.handler.yaml_parser import YamlParser +from data_chain.parser.handler.xlsx_parser import XlsxParser +from data_chain.manager.task_manager import TaskManager +from data_chain.manager.chunk_manager import ChunkManager +from data_chain.manager.dataset_manager import DatasetManager +from data_chain.manager.qa_manager import QAManager +from data_chain.manager.task_queue_mamanger import TaskQueueManager +from data_chain.stores.database.database import TaskEntity, DocumentEntity, DocumentTypeEntity, QAEntity, DataSetEntity, DataSetDocEntity +from data_chain.stores.minio.minio import MinIO +from data_chain.stores.mongodb.mongodb import Task + + +class ImportDataSetWorker(BaseWorker): + """ + ImportDataSetWorker + """ + name = TaskType.DATASET_IMPORT.value + + @staticmethod + async def init(dataset_id: uuid.UUID) -> uuid.UUID: + '''初始化任务''' + dataset_entity = await DatasetManager.get_dataset_by_dataset_id(dataset_id) + if dataset_entity is None: + err = f"[ImportDataSetWorker] 数据集不存在,数据集ID: {dataset_id}" + logging.exception(err) + return None + dataset_entity = await DatasetManager.update_dataset_by_dataset_id(dataset_id, {"status": DataSetStatus.PENDING.value}) + task_entity = TaskEntity( + team_id=dataset_entity.team_id, + user_id=dataset_entity.author_id, + op_id=dataset_entity.id, + op_name=dataset_entity.name, + type=TaskType.KB_IMPORT.value, + retry=0, + status=TaskStatus.PENDING.value) + task_entity = await TaskManager.add_task(task_entity) + return task_entity.id + + @staticmethod + async def reinit(task_id: uuid.UUID) -> bool: + '''重新初始化任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ImportDataSetWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return False + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"score": 0}) + await QAManager.update_qa_by_dataset_id(task_entity.op_id, {"status": QAStatus.DELETED.value}) + tmp_path = os.path.join(IMPORT_DATASET_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + if task_entity.retry < config['TASK_RETRY_TIME_LIMIT']: + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.PENDING.value}) + return True + else: + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.DELETED.value}) + return False + + @staticmethod + async def deinit(task_id: uuid.UUID) -> uuid.UUID: + '''析构任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ImportDataSetWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + tmp_path = os.path.join(IMPORT_DATASET_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.IDLE.value}) + return task_id + + @staticmethod + async def init_path(task_id: uuid.UUID) -> str: + '''初始化路径''' + tmp_path = os.path.join(IMPORT_DATASET_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + os.makedirs(tmp_path) + return tmp_path + + @staticmethod + async def download_file_from_minio(dataset_id: uuid.UUID, tmp_path: str) -> str: + '''从MinIO下载文件''' + file_path = os.path.join(tmp_path, str(dataset_id)) + if not os.path.exists(file_path): + await MinIO.download_object( + bucket_name=IMPORT_DATASET_PATH_IN_MINIO, + object_name=str(dataset_id), + file_path=file_path + ) + return file_path + + @staticmethod + async def load_qa_entity_from_file(dataset_id: uuid.UUID, file_path: str) -> list[QAEntity]: + '''从文件中加载QA实体''' + parser_result = None + parsers = { + 'json': JsonParser, + 'yaml': YamlParser, + 'xlsx': XlsxParser + } + extension = None + for parser_name, parser in parsers: + try: + parser_result = await parser.parser(file_path) + extension = parser_name + break + except Exception as e: + err = f"[GenerateDataSetWorker] 解析文件失败,文件路径: {file_path},错误信息: {e}" + logging.exception(err) + if parser_result is None: + err = f"[GenerateDataSetWorker] 解析文件失败,文件路径: {file_path},不支持的文件格式" + logging.exception(err) + raise err + qa_entities = [] + if extension == 'xlsx': + nodes = parser_result.nodes + ignore = True + for node in nodes: + if ignore: + ignore = False + continue + tmp_list = node.content + if len(tmp_list) < 3: + err = f"[GenerateDataSetWorker] qa对提取失败,文件路径: {file_path},qa对长度不足3" + logging.exception(err) + continue + question = tmp_list[0] + answer = tmp_list[1] + chunk = tmp_list[2] + qa_entity = QAEntity( + dataset_id=dataset_id, + doc_id=None, + doc_name='', + question=question, + answer=answer, + chunk=chunk, + ) + qa_entities.append(qa_entity) + elif extension == 'json' or extension == 'yaml': + nodes = parser_result.nodes + for node in nodes: + tmp_dict = node.content + if 'question' not in tmp_dict or 'answer' not in tmp_dict or 'chunk' not in tmp_dict: + err = f"[GenerateDataSetWorker] qa对提取失败,文件路径: {file_path},qa对格式不正确" + logging.exception(err) + continue + question = tmp_dict['question'] + answer = tmp_dict['answer'] + chunk = tmp_dict['chunk'] + qa_entity = QAEntity( + dataset_id=dataset_id, + doc_id=None, + doc_name='', + question=question, + answer=answer, + chunk=chunk, + ) + qa_entities.append(qa_entity) + qa_entities = qa_entities[:512] + await QAManager.add_qas(qa_entities) + await DatasetManager.update_dataset_by_dataset_id(dataset_id, {"data_cnt": len(qa_entities)}) + return qa_entities + + @staticmethod + async def update_dataset_score(dataset_id: uuid.UUID, qa_entities: list[QAEntity], llm: LLM) -> None: + '''更新数据集分数''' + if not qa_entities: + return + databse_score = 0 + with open(config['PROMPT_PATH'], 'r', encoding='utf-8') as f: + prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) + cal_qa_score_prompt_template = prompt_dict.get('CAL_QA_SCORE_PROMPT', '') + for qa_entity in qa_entities: + chunk = qa_entity.chunk + question = qa_entity.question + answer = qa_entity.answer + sys_call = cal_qa_score_prompt_template.format( + content=TokenTool.get_k_tokens_words_from_content(chunk, llm.max_tokens//9*4), + question=TokenTool.get_k_tokens_words_from_content(question, llm.max_tokens//9), + answer=TokenTool.get_k_tokens_words_from_content(answer, llm.max_tokens//9*4) + ) + usr_call = '请输出分数' + score = await llm.nostream([], sys_call, usr_call) + score = eval(score) + databse_score += score + databse_score /= len(qa_entities) + await DatasetManager.update_dataset_by_dataset_id(dataset_id, {"score": databse_score}) + + @staticmethod + async def run(task_id: uuid.UUID) -> None: + '''运行任务''' + try: + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ImportDataSetWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + raise err + llm = LLM( + openai_api_key=config['OPENAI_API_KEY'], + openai_api_base=config['OPENAI_API_BASE'], + model_name=config['MODEL_NAME'], + max_tokens=config['MAX_TOKENS'], + ) + dataset_entity = await DatasetManager.get_dataset_by_dataset_id(task_entity.op_id) + if dataset_entity is None: + err = f"[ImportDataSetWorker] 数据集不存在,数据集ID: {task_entity.op_id}" + logging.exception(err) + raise err + await DatasetManager.update_dataset_by_dataset_id(dataset_entity.id, {"status": DataSetStatus.IMPORTING.value}) + current_stage = 0 + stage_cnt = 3 + tmp_path = await ImportDataSetWorker.init_path(task_id) + current_stage += 1 + await ImportDataSetWorker.report(task_id, "初始化路径", current_stage, stage_cnt) + await ImportDataSetWorker.download_file_from_minio(dataset_entity.id, tmp_path) + current_stage += 1 + await ImportDataSetWorker.report(task_id, "下载文件", current_stage, stage_cnt) + qa_entities = await ImportDataSetWorker.load_qa_entity_from_file(dataset_entity.id, tmp_path) + current_stage += 1 + await ImportDataSetWorker.report(task_id, "加载qa实体", current_stage, stage_cnt) + await ImportDataSetWorker.update_dataset_score(dataset_entity.id, qa_entities, llm) + current_stage += 1 + await ImportDataSetWorker.report(task_id, "更新数据集分数", current_stage, stage_cnt) + await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.SUCCESS.value)) + except Exception as e: + err = f"[ImportDataSetWorker] 任务失败,task_id: {task_id},错误信息: {e}" + logging.exception(err) + await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.FAILED.value)) + await ImportDataSetWorker.report(task_id, "任务失败", 0, 1) + + @staticmethod + async def stop(task_id: uuid.UUID) -> uuid.UUID: + '''停止任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ImportDataSetWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.IDLE.value}) + tmp_path = os.path.join(IMPORT_DATASET_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + return task_id + + @staticmethod + async def delete(task_id: uuid.UUID) -> uuid.UUID: + '''删除任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ImportDataSetWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + if task_entity.status == TaskStatus.CANCLED or TaskStatus.FAILED.value: + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.DELETED.value}) + await MinIO.delete_object( + bucket_name=IMPORT_DATASET_PATH_IN_MINIO, + object_name=str(task_entity.op_id) + ) + return task_id diff --git a/data_chain/apps/base/task/worker/import_knowledge_base_worker.py b/data_chain/apps/base/task/worker/import_knowledge_base_worker.py new file mode 100644 index 0000000..7790a44 --- /dev/null +++ b/data_chain/apps/base/task/worker/import_knowledge_base_worker.py @@ -0,0 +1,254 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +import uuid +import os +import shutil +import yaml +from data_chain.apps.base.zip_handler import ZipHandler +from data_chain.config.config import config +from data_chain.logger.logger import logger as logging +from data_chain.apps.base.task.worker.base_worker import BaseWorker +from data_chain.entities.enum import TaskType, TaskStatus, KnowledgeBaseStatus, DocumentStatus +from data_chain.entities.common import DEFAULt_DOC_TYPE_ID, IMPORT_KB_PATH_IN_OS, DOC_PATH_IN_MINIO, IMPORT_KB_PATH_IN_MINIO +from data_chain.manager.task_manager import TaskManager +from data_chain.manager.knowledge_manager import KnowledgeBaseManager +from data_chain.manager.document_type_manager import DocumentTypeManager +from data_chain.manager.document_manager import DocumentManager +from data_chain.manager.task_queue_mamanger import TaskQueueManager +from data_chain.stores.database.database import TaskEntity, DocumentEntity, DocumentTypeEntity +from data_chain.stores.minio.minio import MinIO +from data_chain.stores.mongodb.mongodb import Task + + +class ImportKnowledgeBaseWorker(BaseWorker): + """ + ImportKnowledgeBaseWorker + """ + name = TaskType.KB_IMPORT.value + + @staticmethod + async def init(kb_id: uuid.UUID) -> uuid.UUID: + '''初始化任务''' + knowledge_base_entity = await KnowledgeBaseManager.get_knowledge_base_by_kb_id(kb_id) + if knowledge_base_entity is None: + err = f"[ImportKnowledgeBaseWorker] 知识库不存在,知识库ID: {kb_id}" + logging.exception(err) + return None + knowledge_base_entity = await KnowledgeBaseManager.update_knowledge_base_by_kb_id(kb_id, {"status": KnowledgeBaseStatus.PENDING.value}) + task_entity = TaskEntity( + team_id=knowledge_base_entity.team_id, + user_id=knowledge_base_entity.author_id, + op_id=knowledge_base_entity.id, + op_name=knowledge_base_entity.name, + type=TaskType.KB_IMPORT.value, + retry=0, + status=TaskStatus.PENDING.value) + task_entity = await TaskManager.add_task(task_entity) + return task_entity.id + + @staticmethod + async def reinit(task_id: uuid.UUID) -> bool: + '''重新初始化任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ImportKnowledgeBaseWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return False + tmp_path = os.path.join(IMPORT_KB_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + if task_entity.retry < config['TASK_RETRY_TIME_LIMIT']: + await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.PENDING.value}) + return True + else: + await MinIO.delete_object(IMPORT_KB_PATH_IN_OS, str(task_id)) + await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.DELETED.value}) + return False + + @staticmethod + async def deinit(task_id: uuid.UUID) -> uuid.UUID: + '''析构任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ImportKnowledgeBaseWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.IDLE.value}) + tmp_path = os.path.join(IMPORT_KB_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + return task_id + + @staticmethod + async def init_path(task_id: uuid.UUID) -> tuple: + '''初始化存放配置文件和文档的路径''' + tmp_path = os.path.join(IMPORT_KB_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + os.mkdir(tmp_path) + source_path = os.path.join(tmp_path, f"{task_id}.zip") + target_path = os.path.join(tmp_path, "source") + os.mkdir(source_path) + os.mkdir(target_path) + doc_config_path = os.path.join(target_path, "doc_config") + doc_download_path = os.path.join(target_path, "doc_download") + os.mkdir(doc_config_path) + os.mkdir(doc_download_path) + return (source_path, target_path, doc_config_path, doc_download_path) + + @staticmethod + async def download_zip_from_minio(source_path: str, kb_id: uuid.UUID) -> None: + '''从minio下载zip文件''' + await MinIO.download_object(IMPORT_KB_PATH_IN_MINIO, str(kb_id), source_path) + + @staticmethod + async def unzip_config_and_document(source_path: str, target_path: str) -> None: + '''解压zip文件''' + await ZipHandler.unzip_file(source_path, target_path) + + @staticmethod + async def add_doc_types_to_kb(kb_id: uuid.UUID, source_path: str) -> dict[uuid.UUID, uuid.UUID]: + '''添加文档类型到知识库''' + yaml_path = os.path.join(source_path, "kb_config.yaml") + with open(yaml_path, "r", encoding="utf-8") as f: + kb_config = yaml.load(f, Loader=yaml.SafeLoader) + doc_types_old_id_map_to_new_id = {} + doc_type_dicts = kb_config.get("doc_types", []) + for doc_type_dict in doc_type_dicts: + doc_type_entity = DocumentTypeEntity( + kb_id=kb_id, + name=doc_type_dict.get("name") + ) + doc_type_entity = await DocumentTypeManager.add_document_type(doc_type_entity) + if doc_type_entity: + doc_types_old_id_map_to_new_id[doc_type_dict['id']] = doc_type_entity.id + + @staticmethod + async def add_docs_to_kb(kb_id: uuid.UUID, doc_config_path: str, doc_download_path: str, + doc_types_old_id_map_to_new_id: dict[uuid.UUID, uuid.UUID]) -> dict[uuid.UUID, uuid.UUID]: + '''添加文档到知识库''' + kb_entity = await KnowledgeBaseManager.get_knowledge_base_by_kb_id(kb_id) + doc_old_id_map_to_new_id = {} + doc_config_names = os.listdir(doc_config_path) + for doc_config_name in doc_config_names: + try: + doc_config_path = os.path.join(doc_config_path, doc_config_name) + doc_path = os.path.join(doc_download_path, doc_config_name) + if not os.path.exists(doc_path): + continue + with open(doc_config_path, "r", encoding="utf-8") as f: + doc_config = yaml.load(f, Loader=yaml.SafeLoader) + doc_type_id = doc_types_old_id_map_to_new_id.get(doc_config.get("type_id"), DEFAULt_DOC_TYPE_ID) + document_entity = DocumentEntity( + team_id=kb_entity.team_id, + kb_id=kb_entity.id, + author_id=kb_entity.author_id, + author_name=kb_entity.author_name, + name=doc_config.get("name", ''), + extension=doc_config.get("extension", ''), + size=doc_config.get("size", ''), + parse_method=doc_config.get("parse_method", kb_entity.default_parse_method), + chunk_size=doc_config.get("chunk_size", kb_entity.default_chunk_size), + type_id=doc_type_id, + enabled=doc_config.get("enabled", True), + status=DocumentStatus.IDLE.value, + ) + document_entity = await DocumentManager.add_document(document_entity) + if document_entity: + doc_old_id_map_to_new_id[doc_config.get('id', '')] = document_entity.id + except Exception as e: + err = f"[ImportKnowledgeBaseWorker] 添加文档失败,文档配置文件: {doc_config_path},错误信息: {e}" + logging.exception(err) + continue + await KnowledgeBaseManager.update_doc_cnt_and_doc_size(kb_id) + + @staticmethod + async def upload_document_to_minio( + doc_download_path: str, doc_old_id_map_to_new_id: dict[uuid.UUID, uuid.UUID]) -> None: + '''上传文档到minio''' + doc_names = os.listdir(doc_download_path) + for doc_name in doc_names: + try: + doc_path = os.path.join(doc_download_path, doc_name) + if not os.path.exists(doc_path): + continue + if doc_name in doc_old_id_map_to_new_id.keys(): + await MinIO.put_object(DOC_PATH_IN_MINIO, doc_old_id_map_to_new_id.get(doc_name), doc_path) + except Exception as e: + err = f"[ImportKnowledgeBaseWorker] 上传文档失败,文档路径: {doc_path},错误信息: {e}" + logging.exception(err) + continue + + @staticmethod + async def init_doc_parse_tasks(kb_id: uuid.UUID) -> None: + '''初始化文档解析任务''' + document_entities = await DocumentManager.list_all_document_by_kb_id(kb_id) + for document_entity in document_entities: + await BaseWorker.init(TaskType.DOC_PARSE.value, document_entity.id) + + @staticmethod + async def run(task_id: uuid.UUID) -> None: + '''运行任务''' + try: + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ImportKnowledgeBaseWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + raise err + await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.IMPORTING.value}) + current_stage = 0 + stage_cnt = 7 + source_path, target_path, doc_config_path, doc_download_path = await ImportKnowledgeBaseWorker.init_path(task_id) + current_stage += 1 + await ImportKnowledgeBaseWorker.report(task_id, "初始化路径", current_stage, stage_cnt) + kb_id = task_entity.op_id + await ImportKnowledgeBaseWorker.download_zip_from_minio(source_path, kb_id) + current_stage += 1 + await ImportKnowledgeBaseWorker.report(task_id, "下载zip文件", current_stage, stage_cnt) + await ImportKnowledgeBaseWorker.unzip_config_and_document(source_path, target_path) + current_stage += 1 + await ImportKnowledgeBaseWorker.report(task_id, "解压zip文件", current_stage, stage_cnt) + doc_types_old_id_map_to_new_id = await ImportKnowledgeBaseWorker.add_doc_types_to_kb(kb_id, doc_config_path) + current_stage += 1 + await ImportKnowledgeBaseWorker.report(task_id, "添加文档类型到知识库", current_stage, stage_cnt) + doc_old_id_map_to_new_id = await ImportKnowledgeBaseWorker.add_docs_to_kb(kb_id, doc_config_path, doc_download_path, doc_types_old_id_map_to_new_id) + current_stage += 1 + await ImportKnowledgeBaseWorker.report(task_id, "添加文档到知识库", current_stage, stage_cnt) + await ImportKnowledgeBaseWorker.upload_document_to_minio(doc_download_path, doc_old_id_map_to_new_id) + current_stage += 1 + await ImportKnowledgeBaseWorker.report(task_id, "上传文档到minio", current_stage, stage_cnt) + await ImportKnowledgeBaseWorker.init_doc_parse_tasks(kb_id) + current_stage += 1 + await ImportKnowledgeBaseWorker.report(task_id, "初始化文档解析任务", current_stage, stage_cnt) + await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.SUCCESS.value)) + except Exception as e: + err = f"[ImportKnowledgeBaseWorker] 任务失败,task_id: {task_id},错误信息: {e}" + logging.exception(err) + await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.FAILED.value)) + await ImportKnowledgeBaseWorker.report(task_id, err, 0, 1) + + @staticmethod + async def stop(task_id: uuid.UUID) -> uuid.UUID: + '''停止任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ExportKnowledgeBaseWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.IDLE.value}) + tmp_path = os.path.join(IMPORT_KB_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + return task_id + + @staticmethod + async def delete(task_id) -> uuid.UUID: + '''删除任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ExportKnowledgeBaseWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + if task_entity.status == TaskStatus.CANCLED or TaskStatus.FAILED.value: + await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.DELETED.value}) + await MinIO.delete_object(IMPORT_KB_PATH_IN_OS, str(task_entity.op_id)) + return task_id diff --git a/data_chain/apps/base/task/worker/parse_document_worker.py b/data_chain/apps/base/task/worker/parse_document_worker.py new file mode 100644 index 0000000..e789e96 --- /dev/null +++ b/data_chain/apps/base/task/worker/parse_document_worker.py @@ -0,0 +1,506 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +import uuid +import os +import shutil +import yaml +import random +import io +import numpy as np +from PIL import Image +from data_chain.parser.tools.ocr_tool import OcrTool +from data_chain.parser.tools.token_tool import TokenTool +from data_chain.parser.tools.image_tool import ImageTool +from data_chain.parser.handler.base_parser import BaseParser +from data_chain.apps.base.zip_handler import ZipHandler +from data_chain.parser.parse_result import ParseNode, ParseResult +from data_chain.llm.llm import LLM +from data_chain.embedding.embedding import Embedding +from data_chain.config.config import config +from data_chain.logger.logger import logger as logging +from data_chain.apps.base.task.worker.base_worker import BaseWorker +from data_chain.entities.enum import TaskType, TaskStatus, KnowledgeBaseStatus, ParseMethod, DocumentStatus, ChunkStatus, ImageStatus, DocParseRelutTopology, ChunkParseTopology, ChunkType +from data_chain.entities.common import DEFAULt_DOC_TYPE_ID, DOC_PATH_IN_MINIO, DOC_PATH_IN_OS, IMAGE_PATH_IN_MINIO +from data_chain.manager.task_manager import TaskManager +from data_chain.manager.knowledge_manager import KnowledgeBaseManager +from data_chain.manager.document_type_manager import DocumentTypeManager +from data_chain.manager.document_manager import DocumentManager +from data_chain.manager.chunk_manager import ChunkManager +from data_chain.manager.image_manager import ImageManager +from data_chain.manager.task_queue_mamanger import TaskQueueManager +from data_chain.stores.database.database import TaskEntity, DocumentEntity, DocumentTypeEntity, ChunkEntity, ImageEntity +from data_chain.stores.minio.minio import MinIO +from data_chain.stores.mongodb.mongodb import Task + + +class ParseDocumentWorker(BaseWorker): + name = TaskType.DOC_PARSE + + @staticmethod + async def init(doc_id: uuid.UUID) -> uuid.UUID: + '''初始化任务''' + doc_entity = await DocumentManager.get_document_by_doc_id(doc_id) + if doc_entity is None: + err = f"[ParseDocumentWorker] 文档不存在,doc_id: {doc_id}" + logging.exception(err) + raise None + await DocumentManager.update_document_by_doc_id(doc_id, {"status": DocumentStatus.PENDING.value, "abstarct": "", "abstract_vector": None}) + await ImageManager.update_images_by_doc_id(doc_id, {"status": ImageStatus.DELETED.value}) + await ChunkManager.update_chunk_by_doc_id(doc_id, {"status": ChunkStatus.DELETED.value}) + task_entity = TaskEntity( + team_id=doc_entity.team_id, + user_id=doc_entity.author_id, + op_id=doc_entity.id, + op_namne=doc_entity.name, + type=TaskType.DOC_PARSE.value, + retry=0, + status=TaskStatus.PENDING.value) + task_entity = await TaskManager.add_task(task_entity) + return task_entity.id + + @staticmethod + async def reinit(task_id: uuid.UUID) -> bool: + '''重新初始化任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ImportKnowledgeBaseWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return False + doc_id = task_entity.op_id + await DocumentManager.update_document_by_doc_id(task_entity.op_id, {"abstarct": "", "abstract_vector": None}) + await ImageManager.update_images_by_doc_id(doc_id, {"status": ImageStatus.DELETED.value}) + await ChunkManager.update_chunk_by_doc_id(doc_id, {"status": ChunkStatus.DELETED.value}) + tmp_path = os.path.join(DOC_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + if task_entity.retry < config['TASK_RETRY_TIME_LIMIT']: + await DocumentManager.update_document_by_doc_id(task_entity.op_id, {"status": DocumentStatus.PENDING.value}) + return True + else: + await DocumentManager.update_document_by_doc_id(task_entity.op_id, {"status": DocumentStatus.IDLE.value}) + return False + + @staticmethod + async def deinit(task_id: uuid.UUID) -> uuid.UUID: + '''析构任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ParseDocumentWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + await DocumentManager.update_document_by_doc_id(task_entity.op_id, {"status": DocumentStatus.IDLE.value}) + tmp_path = os.path.join(DOC_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + return task_id + + @staticmethod + async def init_path(task_id: uuid.UUID) -> tuple: + '''初始化存放配置文件和文档的路径''' + tmp_path = os.path.join(DOC_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + os.makedirs(tmp_path) + image_path = os.path.join(tmp_path, "images") + os.makedirs(image_path) + return tmp_path, image_path + + @staticmethod + async def download_doc_from_minio(doc_id: uuid.UUID, tmp_path: str) -> str: + '''下载文档''' + doc_entity = await DocumentManager.get_document_by_doc_id(doc_id) + if doc_entity is None: + err = f"[ParseDocumentWorker] 文档不存在,doc_id: {doc_id}" + logging.exception(err) + raise err + file_path = os.path.join(tmp_path, str(doc_id)+'.'+doc_entity.extension), + await MinIO.download_object( + bucket_name=DOC_PATH_IN_MINIO, + object_name=str(doc_entity.id), + file_path=file_path, + ) + return file_path + + @staticmethod + async def parse_doc(file_path: str) -> ParseResult: + '''解析文档''' + extension = os.path.splitext(file_path)[-1] + parse_result = await BaseParser.parser(extension, file_path) + return parse_result + + @staticmethod + async def handle_parse_result(parse_result: ParseResult, doc_entity: DocumentEntity, llm: LLM = None) -> None: + '''处理解析结果''' + if doc_entity.parse_method != ParseMethod.OCR.value and doc_entity.parse_method != ParseMethod.EHANCED: + nodes = [] + for node in parse_result.nodes: + if node.type != ChunkType.IMAGE: + nodes.append(node) + parse_result.nodes = nodes + if doc_entity.parse_method == ParseMethod.QA: + for node in parse_result.nodes: + try: + if isinstance(node.content, list): + question = '' + if len(node.content) > 0: + question = str(node.content[0]) + answer = '' + if len(node.content) > 1: + answer = str(node.content[1]) + elif isinstance(node.content, dict): + question = node.content.get('question', '') + answer = node.content.get('answer', '') + else: + question = '' + answer = '' + except Exception as e: + question = '' + answer = '' + err = f"[ParseDocumentWorker] 解析问题和答案失败,doc_id: {doc_entity.id}, error: {e}" + logging.exception(err) + node.text_feature = question + node.content = 'question: ' + question + '\n' + 'answer: ' + answer + else: + for node in parse_result.nodes: + if isinstance(node.content, list) or isinstance(node.content, dict): + node.content = str(node.content) + node.text_feature = str(node.content) + elif isinstance(node.content, str): + if node.type == ChunkType.TEXT or node.type == ChunkType.LINK: + node.text_feature = node.content + elif node.type == ChunkType.CODE: + if llm is not None: + node.text_feature = await TokenTool.get_abstract_by_llm(node.content, llm) + if node.text_feature is None: + node.text_feature = TokenTool.get_top_k_keywords(node.content) + + @staticmethod + async def upload_parse_image_to_minio_and_postgres( + parse_result: ParseResult, doc_entity: DocumentEntity, image_path: str) -> None: + '''上传解析图片到minio''' + image_entities = [] + for node in parse_result.nodes: + if node.type == ChunkType.IMAGE: + try: + extension = ImageTool.get_image_type(node.content) + image_entity = ImageEntity( + id=uuid.uuid4(), + team_id=doc_entity.team_id, + doc_id=doc_entity.id, + chunk_id=node.id, + extension=extension, + ) + image_entities.append(image_entity) + image_blob = node.content + image = Image.open(io.BytesIO(image_blob)) + image_file_path = os.path.join(image_path, str(node.id) + '.' + extension) + with open(image_file_path, 'wb') as f: + f.write(image) + await MinIO.upload_object( + bucket_name=IMAGE_PATH_IN_MINIO, + object_name=str(node.id), + file_path=image_file_path + ) + except Exception as e: + err = f"[ParseDocumentWorker] 上传解析图片到minio失败,doc_id: {doc_entity.id}, image_path: {image_path}, error: {e}" + logging.exception(err) + continue + image_entities.append(image_entity) + index = 0 + while index < len(image_entities): + try: + await ImageManager.add_images(image_entities[index:index+1024]) + except Exception as e: + err = f"[ParseDocumentWorker] 上传解析图片到postgres失败,doc_id: {doc_entity.id}, image_path: {image_path}, error: {e}" + logging.exception(err) + index += 1024 + + @staticmethod + async def ocr_from_parse_image(parse_result: ParseResult, llm: LLM = None) -> list: + '''从解析图片中获取ocr''' + for node in parse_result.nodes: + if node.type == ChunkType.IMAGE: + try: + image_blob = node.content + image = Image.open(io.BytesIO(image_blob)) + img_np = np.array(image) + image_related_text = '' + for related_node in node.link_nodes: + if related_node.type != ChunkType.IMAGE: + image_related_text += related_node.content + node.content = OcrTool.image_to_text(img_np, image_related_text, llm) + node.text_feature = node.content + except Exception as e: + err = f"[ParseDocumentWorker] OCR失败,doc_id: {node.doc_id}, error: {e}" + logging.exception(err) + continue + + @staticmethod + async def merge_and_split_text(parse_result: ParseResult, doc_entity: DocumentEntity) -> None: + '''合并和拆分内容''' + if doc_entity.parse_method == ParseMethod.QA or doc_entity.parse_relut_topology == DocParseRelutTopology.TREE: + return + + nodes = [] + for node in parse_result.nodes: + if node.type == ChunkType.TEXT: + tokens = TokenTool.get_tokens(node.content) + if len(nodes) == 0 or ( + len(nodes) + and ( + nodes[-1].type != ChunkType.TEXT or TokenTool.get_tokens(nodes[-1].content) + tokens > doc_entity. + chunk_size)) or len(nodes) == 0: + nodes.append(node) + else: + nodes[-1].content += node.content + else: + nodes.append(node) + parse_result.nodes = nodes + nodes = [] + for node in parse_result.nodes: + if node.type == ChunkType.TEXT: + sentences = TokenTool.content_to_sentences(node.content) + tmp = '' + for sentence in sentences: + if len(tmp) == 0: + tmp = sentence + else: + if TokenTool.get_tokens(tmp) + TokenTool.get_tokens(sentence) > doc_entity.chunk_size: + node = ParseNode( + id=uuid.uuid4(), + lv=node.lv, + parse_topology_type=ChunkParseTopology.GERNERAL, + text_feature=tmp, + content=tmp, + type=ChunkType.TEXT, + link_nodes=[] + ) + nodes.append(node) + else: + tmp += sentence + if len(tmp) > 0: + node = ParseNode( + id=uuid.uuid4(), + lv=node.lv, + parse_topology_type=ChunkParseTopology.GERNERAL, + content=tmp, + type=ChunkType.TEXT, + link_nodes=[] + ) + nodes.append(node) + parse_result.nodes = nodes + + @staticmethod + async def push_up_words_feature(parse_result: ParseResult, llm: LLM = None) -> None: + '''推送上层词特征''' + async def dfs(node: ParseNode, parent_node: ParseNode, llm) -> None: + if parent_node is None: + node.pre_id = parent_node.id + for child_node in node.link_nodes: + await dfs(child_node, node) + if node.title is not None: + if len(node.title) == 0: + if llm is not None: + content = '父标题\n' + if parent_node and parent_node.title: + if len(parent_node.title) > 0: + content += parent_node.title + '\n' + else: + sentences = TokenTool.get_leave_setences_from_content_len(parent_node.content, 1) + if sentences: + content += sentences[0] + '\n' + index = 0 + for node in node.link_nodes: + content += '子标题'+str(index) + '\n' + if node.title: + content += node.title + '\n' + else: + sentences = TokenTool.get_leave_setences_from_content_len(node.content, 1) + if sentences: + content += sentences[0] + '\n' + index += 1 + title = await TokenTool.get_title_by_llm(content, llm) + if not title: + sentences = TokenTool.get_leave_setences_from_content_len(content, 1) + if sentences: + title = sentences[0] + node.text_feature = title + node.content = node.text_feature + else: + if parent_node and parent_node.title: + if len(parent_node.title) > 0: + content += parent_node.title + '\n' + else: + sentences = TokenTool.get_leave_setences_from_content_len(parent_node.content, 1) + if sentences: + content += sentences[0] + '\n' + for node in node.link_nodes: + if node.title: + content += node.title + '\n' + else: + sentences = TokenTool.get_leave_setences_from_content_len(node.content, 1) + if sentences: + content += sentences[0] + '\n' + sentences = TokenTool.get_leave_setences_from_content_len(content, 1) + if sentences: + node.text_feature = sentences[0] + else: + node.text_feature = '' + node.content = node.text_feature + else: + node.text_feature = node.title + node.content = node.text_feature + if parse_result.parse_topology_type == DocParseRelutTopology.TREE: + await dfs(parse_result.nodes, None, llm) + + @staticmethod + async def update_doc_abstract(doc_id: uuid.UUID, parse_result: ParseResult, llm: LLM = None) -> str: + '''获取文档摘要''' + abstract = "" + for node in parse_result.nodes: + abstract += node.content + if llm is not None: + abstract = await TokenTool.get_abstract_by_llm(abstract, llm) + else: + sentences = TokenTool.get_leave_setences_from_content_len(abstract, 1) + if sentences: + abstract = sentences[0] + else: + abstract = '' + abstract_vector = await Embedding.vectorize_embedding(abstract) + await DocumentManager.update_document_by_doc_id( + doc_id, + { + "abstract": abstract, + "abstract_vector": abstract_vector + } + ) + return abstract + + @staticmethod + async def embedding_chunk(parse_result: ParseResult) -> None: + '''嵌入chunk''' + for node in parse_result.nodes: + node.vector = await Embedding.vectorize_embedding(node.text_feature) + + @staticmethod + async def add_parse_result_to_db(parse_result: ParseResult, doc_entity: DocumentEntity) -> None: + '''添加解析结果到数据库''' + chunk_entities = [] + global_offset = 0 + local_offset = 0 + for node in parse_result.nodes: + if not node.content: + continue + chunk_entity = ChunkEntity( + id=node.id, + team_id=doc_entity.team_id, + kb_id=doc_entity.kb_id, + doc_id=doc_entity.id, + text=node.content, + text_vector=node.vector, + tokens=TokenTool.get_tokens(node.content), + type=node.type, + pre_id_in_parse_topology=node.pre_id, + parse_topology_type=node.parse_topology_type, + global_offset=global_offset, + local_offset=local_offset, + enabled=True, + status=ChunkStatus.EXISTED.value + ) + chunk_entities.append(chunk_entity) + if global_offset and node[global_offset].type != node[global_offset-1].type: + local_offset = 0 + local_offset += 1 + global_offset += 1 + index = 0 + while index < len(chunk_entities): + try: + await ChunkManager.add_chunks(chunk_entities[index:index+1024]) + except Exception as e: + err = f"[ParseDocumentWorker] 添加解析结果到数据库失败,doc_id: {doc_entity.id}, error: {e}" + logging.exception(err) + index += 1024 + + @staticmethod + async def run(task_id: uuid.UUID) -> None: + '''运行任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ParseDocumentWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + raise err + doc_entity = await DocumentManager.get_document_by_doc_id(task_entity.op_id) + if doc_entity is None: + err = f"[ParseDocumentWorker] 文档不存在,doc_id: {task_entity.op_id}" + logging.exception(err) + raise err + await DocumentManager.update_document_by_doc_id(task_entity.op_id, {"status": DocumentStatus.RUNNING.value}) + try: + if doc_entity.parse_method == ParseMethod.EHANCED: + llm = LLM( + openai_api_key=config['OPENAI_API_KEY'], + openai_api_base=config['OPENAI_API_BASE'], + model_name=config['MODEL_NAME'], + max_tokens=config['MAX_TOKENS'], + ) + else: + llm = None + tmp_path, image_path = await ParseDocumentWorker.init_path(task_id) + current_stage = 0 + stage_cnt = 10 + await ParseDocumentWorker.download_doc_from_minio(task_entity.op_id, tmp_path) + current_stage += 1 + await ParseDocumentWorker.report(task_id, '下载文档', current_stage, stage_cnt) + file_path = os.path.join(tmp_path, str(task_entity.op_id)+'.'+doc_entity.extension) + parse_result = await ParseDocumentWorker.parse_doc(file_path) + current_stage += 1 + await ParseDocumentWorker.report(task_id, '解析文档', current_stage, stage_cnt) + await ParseDocumentWorker.handle_parse_result(parse_result, doc_entity, llm) + current_stage += 1 + await ParseDocumentWorker.report(task_id, '处理解析结果', current_stage, stage_cnt) + await ParseDocumentWorker.upload_parse_image_to_minio_and_postgres(parse_result, doc_entity, image_path) + current_stage += 1 + await ParseDocumentWorker.report(task_id, '上传解析图片', current_stage, stage_cnt) + await ParseDocumentWorker.ocr_from_parse_image(parse_result, llm) + current_stage += 1 + await ParseDocumentWorker.report(task_id, 'OCR图片', current_stage, stage_cnt) + await ParseDocumentWorker.merge_and_split_text(parse_result, doc_entity) + current_stage += 1 + await ParseDocumentWorker.report(task_id, '合并和拆分文本', current_stage, stage_cnt) + await ParseDocumentWorker.push_up_words_feature(parse_result, llm) + current_stage += 1 + await ParseDocumentWorker.report(task_id, '推送上层词特征', current_stage, stage_cnt) + await ParseDocumentWorker.embedding_chunk(parse_result) + current_stage += 1 + await ParseDocumentWorker.report(task_id, '嵌入chunk', current_stage, stage_cnt) + await ParseDocumentWorker.update_doc_abstract(doc_entity.id, parse_result, llm) + current_stage += 1 + await ParseDocumentWorker.report(task_id, '更新文档摘要', current_stage, stage_cnt) + await ParseDocumentWorker.add_parse_result_to_db(parse_result, doc_entity) + current_stage += 1 + await ParseDocumentWorker.report(task_id, '添加解析结果到数据库', current_stage, stage_cnt) + await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.SUCCESS.value)) + except Exception as e: + err = f"[DocParseWorker] 任务失败,task_id: {task_id},错误信息: {e}" + logging.exception(err) + await TaskManager.update_task_by_id(task_id, {"status": TaskStatus.FAILED.value}) + await ParseDocumentWorker.report(task_id, err, 0, 1) + return None + + @staticmethod + async def stop(task_id: uuid.UUID) -> uuid.UUID: + '''停止任务''' + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = f"[ParseDocumentWorker] 任务不存在,task_id: {task_id}" + logging.exception(err) + return None + await DocumentManager.update_document_by_doc_id(task_entity.op_id, {"status": DocumentStatus.IDLE.value}) + if task_entity.status == TaskStatus.PENDING.value or task_entity.status == TaskStatus.RUNNING.value or task_entity.status == TaskStatus.FAILED.value: + await DocumentManager.update_document_by_doc_id(task_entity.op_id, {"abstarct": "", "abstract_vector": None}) + await ImageManager.update_images_by_doc_id(task_entity.op_id, {"status": ImageStatus.DELETED.value}) + await ChunkManager.update_chunk_by_doc_id(task_entity.op_id, {"status": ChunkStatus.DELETED.value}) + tmp_path = os.path.join(DOC_PATH_IN_OS, str(task_id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + return task_id diff --git a/data_chain/apps/base/document/zip_handler.py b/data_chain/apps/base/zip_handler.py similarity index 57% rename from data_chain/apps/base/document/zip_handler.py rename to data_chain/apps/base/zip_handler.py index 2dab910..41ca1ee 100644 --- a/data_chain/apps/base/document/zip_handler.py +++ b/data_chain/apps/base/zip_handler.py @@ -1,40 +1,45 @@ # Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. import zipfile import os -import concurrent -from data_chain.logger.logger import logger as logging +import asyncio import chardet +from data_chain.logger.logger import logger as logging class ZipHandler(): @staticmethod - def check_zip_file(zip_file_path,max_file_num=2048,max_file_size = 2*1024 * 1024 * 1024): + def check_zip_file(zip_file_path: str, max_file_num: int = 4096, max_file_size: int = 10 * 1024 * 1024 * 1024) -> None: + '''检查压缩文件的数量和大小''' total_size = 0 try: to_zip_file = zipfile.ZipFile(zip_file_path) if len(to_zip_file.filelist) > max_file_num: - logging.error(f"压缩文件{zip_file_path}的数量超过了上限") - return False + err = f"压缩文件{zip_file_path}的数量超过了上限" + logging.error("[ZipHandler] %s", err) + raise err for file in to_zip_file.filelist: total_size += file.file_size if total_size > max_file_size: - logging.error(f"压缩文件{zip_file_path}的尺寸超过了上限") - return False + err = f"压缩文件{zip_file_path}的尺寸超过了上限" + logging.error("[ZipHandler] %s", err) + raise err to_zip_file.namelist() for member in to_zip_file.infolist(): to_zip_file.open(member) - return True except zipfile.BadZipFile: - logging.error(f"文件 {zip_file_path} 可能不是有效的ZIP文件.") - return False + err = f"文件 {zip_file_path} 可能不是有效的ZIP文件." + logging.error("[ZipHandler] %s", err) + raise e except Exception as e: - logging.error(f"处理文件 {zip_file_path} 时出错: {e}") - return False + err = f"处理文件 {zip_file_path} 时出错: {e}" + logging.error("[ZipHandler] %s", err) + raise e @staticmethod - async def zip_dir(start_dir, zip_name): + async def zip_dir(start_dir: str, zip_name: str) -> None: + '''压缩目录''' def zip_dir_excutor(start_dir, zip_name): with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf: for root, dirs, files in os.walk(start_dir): @@ -42,12 +47,16 @@ class ZipHandler(): file_path = os.path.join(root, file) file_path_in_zip = os.path.relpath(file_path, start_dir) zipf.write(file_path, file_path_in_zip) - with concurrent.futures.ThreadPoolExecutor() as executor: - future = executor.submit(zip_dir_excutor, start_dir, zip_name) - return future.result() + try: + await asyncio.to_thread(zip_dir_excutor, start_dir, zip_name) + except Exception as e: + err = f"压缩文件 {zip_name} 时出错: {e}" + logging.error("[ZipHandler] %s", err) + raise e @staticmethod - async def unzip_file(zip_file_path, target_dir, files_to_extract=None): + async def unzip_file(zip_file_path: str, target_dir: str, files_to_extract: str = None) -> None: + '''解压缩文件''' def unzip_file_executor(zip_file_path, target_dir, files_to_extract=None): if not os.path.exists(target_dir): os.makedirs(target_dir) @@ -65,12 +74,8 @@ class ZipHandler(): if file_name in zip_ref.namelist(): zip_ref.extract(file_name, path=target_dir) try: - with concurrent.futures.ThreadPoolExecutor() as executor: - future = executor.submit(unzip_file_executor, zip_file_path, target_dir, files_to_extract) - # 阻塞等待结果 - future.result() + await asyncio.to_thread(unzip_file_executor, zip_file_path, target_dir, files_to_extract) except Exception as e: - logging.error(f"Error occurred while extracting files from {zip_file_path} due to {e}") - return False - - return True + err = f"解压缩文件 {zip_file_path} 时出错: {e}" + logging.error("[ZipHandler] %s", err) + raise e diff --git a/data_chain/apps/router/acc_testing.py b/data_chain/apps/router/acc_testing.py new file mode 100644 index 0000000..e7a6012 --- /dev/null +++ b/data_chain/apps/router/acc_testing.py @@ -0,0 +1,128 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from fastapi import APIRouter, Depends, Query, Body, File, UploadFile +from fastapi.responses import StreamingResponse, HTMLResponse, Response +from typing import Annotated +import urllib +from uuid import UUID +from httpx import AsyncClient +from typing import Annotated +from data_chain.entities.request_data import ( + ListTestingRequest, + CreateTestingRequest, + UpdateTestingRequest +) +from data_chain.entities.response_data import ( + ListTestingResponse, + ListTestCaseResponse, + CreateTestingResponsing, + RunTestingResponse, + UpdateTestingResponse, + DeleteTestingResponse +) +from data_chain.apps.service.knwoledge_base_service import KnowledgeBaseService +from data_chain.apps.service.dataset_service import DataSetService +from data_chain.apps.service.acc_testing_service import TestingService +from data_chain.apps.service.session_service import get_user_sub, verify_user +from data_chain.apps.service.router_service import get_route_info +router = APIRouter(prefix='/testing', tags=['Testing']) + + +@router.post('/list', response_model=ListTestingResponse, dependencies=[Depends(verify_user)]) +async def list_testing_by_kb_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + req: Annotated[ListTestingRequest, Body()], +): + if not (await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, req.kb_id, action)): + raise Exception("用户没有权限访问该知识库的测试") + list_testing_msg = await TestingService.list_testing_by_kb_id(req) + return ListTestingResponse(result=list_testing_msg) + + +@router.post('/testcase', response_model=ListTestCaseResponse, + dependencies=[Depends(verify_user)]) +async def list_testcase_by_testing_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + testing_id: Annotated[UUID, Query(alias="testingId")]): + if not (await TestingService.validate_user_action_to_testing(user_sub, testing_id, action)): + raise Exception("用户没有权限访问该测试的测试用例") + testing_testcase = await TestingService.list_testcase_by_testing_id(testing_id) + return ListTestCaseResponse(result=testing_testcase) + + +@router.get('/download', dependencies=[Depends(verify_user)]) +async def download_testing_report_by_testing_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + testing_id: Annotated[UUID, Query(alias="testingId")]): + if not (await TestingService.validate_user_action_to_testing(user_sub, testing_id, action)): + raise Exception("用户没有权限访问该测试的测试报告") + report_link_url = await TestingService.generate_testing_report_download_url(testing_id) + document_name, extension = str(testing_id)+".zip", "zip" + async with AsyncClient() as async_client: + response = await async_client.get(report_link_url) + if response.status_code == 200: + content_disposition = f"attachment; filename={urllib.parse.quote(document_name.encode('utf-8'))}" + + async def stream_generator(): + async for chunk in response.aiter_bytes(chunk_size=8192): + yield chunk + + return StreamingResponse(stream_generator(), headers={ + "Content-Disposition": content_disposition, + "Content-Length": str(response.headers.get('content-length')) + }, media_type="application/" + extension) + else: + raise Exception(f"下载测试报告失败,状态码: {response.status_code}") + + +@router.post( + '', response_model=CreateTestingResponsing, dependencies=[Depends(verify_user)]) +async def create_testing( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + req: Annotated[CreateTestingRequest, Body()]): + if not (await DataSetService.validate_user_action_to_dataset(user_sub, req.dataset_id, action)): + raise Exception("用户没有权限访问该数据集的测试") + task_id = await TestingService.create_testing(req) + return CreateTestingResponsing(result=task_id) + + +@router.post('/run', response_model=RunTestingResponse, + dependencies=[Depends(verify_user)]) +async def run_testing_by_testing_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + testing_id: Annotated[UUID, Query(alias="testingId")], + run: Annotated[bool, Query()]): + if not (await TestingService.validate_user_action_to_testing(user_sub, testing_id, action)): + raise Exception("用户没有权限访问该测试的测试用例") + task_id = await TestingService.run_testing_by_testing_id(testing_id, run) + return RunTestingResponse(result=task_id) + + +@router.put('', response_model=UpdateTestingResponse, + dependencies=[Depends(verify_user)]) +async def update_testing_by_testing_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + testing_id: Annotated[UUID, Query(alias="testingId")], + req: Annotated[UpdateTestingRequest, Body(...)]): + if not (await TestingService.validate_user_action_to_testing(user_sub, testing_id, action)): + raise Exception("用户没有权限访问该测试的测试用例") + testing_id = await TestingService.update_testing_by_testing_id(testing_id, req) + return UpdateTestingResponse() + + +@router.delete('', response_model=DeleteTestingResponse, + dependencies=[Depends(verify_user)]) +async def delete_testing_by_testing_ids( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + testing_ids: Annotated[list[UUID], Body(alias="testingIds")]): + for testing_id in testing_ids: + if not (await TestingService.validate_user_action_to_testing(user_sub, testing_id, action)): + raise Exception("用户没有权限访问该测试的测试用例") + testing_ids = await TestingService.delete_testing_by_testing_ids(testing_ids) + return DeleteTestingResponse(result=testing_ids) diff --git a/data_chain/apps/router/chunk.py b/data_chain/apps/router/chunk.py index 794afb0..e378f7b 100644 --- a/data_chain/apps/router/chunk.py +++ b/data_chain/apps/router/chunk.py @@ -1,79 +1,42 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from typing import List -import tiktoken -from fastapi import APIRouter, Depends, status - - -from data_chain.models.service import ChunkDTO -from data_chain.models.api import Page, BaseResponse, ListChunkRequest, SwitchChunkRequest,GetChunkRequest -from data_chain.exceptions.err_code import ErrorCode -from data_chain.exceptions.exception import DocumentException -from data_chain.apps.service.chunk_service import _validate_chunk_belong_to_user, list_chunk, switch_chunk,get_similar_chunks,get_similar_full_text, get_keywords_from_chunk -from data_chain.apps.service.document_service import _validate_doucument_belong_to_user -from data_chain.apps.service.user_service import verify_csrf_token, get_user_id, verify_user - -router = APIRouter(prefix='/chunk', tags=['Corpus']) - - -@router.post('/list', response_model=BaseResponse[Page[ChunkDTO]], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def list(req: ListChunkRequest, user_id=Depends(get_user_id)): - try: - await _validate_doucument_belong_to_user(user_id, req.document_id) - params = dict(req) - chunk_list, total = await list_chunk(params, req.page_number, req.page_size) - chunk_page = Page(page_number=req.page_number, page_size=req.page_size, - total=total, - data_list=chunk_list) - return BaseResponse(data=chunk_page) - except Exception as e: - return BaseResponse(retcode=ErrorCode.CREATE_CHUNK_ERROR, data=str(e.args[0])) - - -@router.post('/switch', response_model=BaseResponse[str], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def switch(req: SwitchChunkRequest, user_id=Depends(get_user_id)): - try: - for id in req.ids: - await _validate_chunk_belong_to_user(user_id, id) - for id in req.ids: - await switch_chunk(id, req.enabled) - return BaseResponse(data='success') - except Exception as e: - return BaseResponse(retcode=ErrorCode.SWITCH_CHUNK_ERROR, data=str(e.args[0])) - -@router.post('/get', response_model=BaseResponse[List[str]]) -async def get(req:GetChunkRequest): - try: - content = req.content - kb_sn=req.kb_sn - topk=req.topk - retrieval_mode=req.retrieval_mode - enc = tiktoken.encoding_for_model("gpt-4") - str_len_keywords_len_ratio_pair_list=[(30,1),(60,0.75),(120,0.55),(240,0.35),(1000,0.1)] - content_len=len(enc.encode(content)) - ratio=0 - for str_len,keywords_len_ratio in str_len_keywords_len_ratio_pair_list: - if content_len<=str_len: - ratio=keywords_len_ratio - break - if ratio==0: - keywords_cnt=100 - else: - keywords_cnt=int(content_len*ratio) - if len(enc.encode(content)) > 100: - keywords=await get_keywords_from_chunk(content,keywords_cnt) - content='' - for keyword in keywords: - content+=keyword+' ' - if retrieval_mode=='chunk': - chunk_list=await get_similar_chunks(content=content,kb_id=kb_sn,topk=topk,devided_by_document_id=False) - elif retrieval_mode=='full_text': - chunk_list=await get_similar_full_text(content=content,kb_id=kb_sn,topk=topk) - else: - chunk_list=[] - return BaseResponse(data=chunk_list) - except Exception as e: - return BaseResponse(retcode=status.HTTP_500_INTERNAL_SERVER_ERROR, data=str(e.args[0])) \ No newline at end of file +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from data_chain.apps.service.session_service import get_user_sub, verify_user +from fastapi import APIRouter, Depends, Query, Body +from typing import Annotated +from uuid import UUID +from data_chain.entities.request_data import ( + ListChunkRequest, + UpdateChunkRequest +) + +from data_chain.entities.response_data import ( + ListChunkMsg, + ListChunkResponse, + UpdateChunkResponse +) +from data_chain.apps.service.router_service import get_route_info +from data_chain.apps.service.document_service import DocumentService +from data_chain.apps.service.chunk_service import ChunkService +router = APIRouter(prefix='/chunk', tags=['Chunk']) + + +@router.post('/list', response_model=ListChunkResponse, dependencies=[Depends(verify_user)]) +async def list_chunks_by_document_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + req: Annotated[ListChunkRequest, Body()], +): + if not (await DocumentService.validate_user_action_to_document(user_sub, req.doc_id, action)): + raise Exception("用户没有权限访问该文档的分片") + list_chunk_msg = await ChunkService.list_chunks_by_document_id(req) + return ListChunkResponse(result=list_chunk_msg) + + +@router.put('', response_model=UpdateChunkResponse, dependencies=[Depends(verify_user)]) +async def update_chunk_by_id(user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + chunk_id: Annotated[UUID, Query(alias="chunkId")], + req: UpdateChunkRequest): + if not (await ChunkService.validate_user_action_to_chunk(user_sub, chunk_id, action)): + raise Exception("用户没有权限访问该文档的分片") + await ChunkService.update_chunk_by_id(chunk_id, req) + return UpdateChunkResponse(result=chunk_id) diff --git a/data_chain/apps/router/dataset.py b/data_chain/apps/router/dataset.py new file mode 100644 index 0000000..167d9cf --- /dev/null +++ b/data_chain/apps/router/dataset.py @@ -0,0 +1,186 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from fastapi import APIRouter, Depends, Query, Body, File, UploadFile +from fastapi.responses import StreamingResponse, HTMLResponse, Response +from httpx import AsyncClient +from typing import Annotated +import urllib +from uuid import UUID +from data_chain.entities.request_data import ( + ListDatasetRequest, + ListDataInDatasetRequest, + CreateDatasetRequest, + UpdateDatasetRequest, + UpdateDataRequest, +) + +from data_chain.entities.response_data import ( + ListDatasetResponse, + ListDataInDatasetResponse, + IsDatasetHaveTestingResponse, + CreateDatasetResponse, + ImportDatasetResponse, + ExportDatasetResponse, + GenerateDatasetResponse, + UpdateDatasetResponse, + UpdateDataResponse, + DeleteDatasetResponse, + DeleteDataResponse +) +from data_chain.apps.service.knwoledge_base_service import KnowledgeBaseService +from data_chain.apps.service.dataset_service import DataSetService +from data_chain.apps.service.task_service import TaskService +from data_chain.apps.service.session_service import get_user_sub, verify_user +from data_chain.apps.service.router_service import get_route_info +router = APIRouter(prefix='/dataset', tags=['Dataset']) + + +@router.post('/list', response_model=ListDatasetResponse, dependencies=[Depends(verify_user)]) +async def list_dataset_by_kb_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + req: Annotated[ListDatasetRequest, Body()], +): + if not (await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, req.kb_id, action)): + raise Exception("用户没有权限访问该知识库的数据集") + list_dataset_msg = await DataSetService.list_dataset(req) + return ListDatasetResponse(result=list_dataset_msg) + + +@router.post('/data', response_model=ListDataInDatasetResponse, dependencies=[Depends(verify_user)]) +async def list_data_in_dataset( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + req: Annotated[ListDataInDatasetRequest, Body()]): + if not (await DataSetService.validate_user_action_to_dataset(user_sub, req.dataset_id, action)): + raise Exception("用户没有权限访问该数据集的数据") + list_data_in_dataset_msg = await DataSetService.list_data_in_dataset(req) + return ListDataInDatasetResponse(result=list_data_in_dataset_msg) + + +@router.get('/testing/exist', response_model=IsDatasetHaveTestingResponse, dependencies=[Depends(verify_user)]) +async def is_dataset_have_testing( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + dataset_id: Annotated[UUID, Query(alias="datasetId")]): + if not (await DataSetService.validate_user_action_to_dataset(user_sub, dataset_id, action)): + raise Exception("用户没有权限访问该数据集的数据") + is_dataset_have_testing_response = await DataSetService.is_dataset_have_testing(dataset_id) + return IsDatasetHaveTestingResponse(result=is_dataset_have_testing_response) + + +@router.get('/download', dependencies=[Depends(verify_user)]) +async def download_dataset_by_task_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + task_id: Annotated[UUID, Query(alias="taskId")]): + + if not (await TaskService.validate_user_action_to_task(user_sub, task_id, action)): + raise Exception("用户没有权限访问该任务的数据集") + dataset_link_url = await DataSetService.generate_dataset_download_url(task_id) + document_name, extension = str(task_id)+".zip", "zip" + async with AsyncClient() as async_client: + response = await async_client.get(dataset_link_url) + if response.status_code == 200: + content_disposition = f"attachment; filename={urllib.parse.quote(document_name.encode('utf-8'))}" + + async def stream_generator(): + async for chunk in response.aiter_bytes(chunk_size=8192): + yield chunk + + return StreamingResponse(stream_generator(), headers={ + "Content-Disposition": content_disposition, + "Content-Length": str(response.headers.get('content-length')) + }, media_type="application/" + extension) + else: + raise Exception(f"下载数据集失败,状态码: {response.status_code}") + + +@router.post('', response_model=CreateDatasetResponse, dependencies=[Depends(verify_user)]) +async def create_dataset( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + req: Annotated[CreateDatasetRequest, Body()] +): + if not (await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, req.kb_id, action)): + raise Exception("用户没有权限访问该知识库的数据集") + task_id = await DataSetService.create_dataset(req) + return CreateDatasetResponse(result=task_id) + + +@router.post('/import', response_model=ImportDatasetResponse, dependencies=[Depends(verify_user)]) +async def import_dataset(user_sub: Annotated[str, Depends(get_user_sub)], + kb_id: Annotated[UUID, Query(alias="kbId")], + dataset_packages: list[UploadFile] = File(...)): + if not (await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, kb_id)): + raise Exception("用户没有权限在该知识库导入数据集") + dataset_import_task_ids = await DataSetService.import_dataset(user_sub, kb_id, dataset_packages) + return ImportDatasetResponse(result=dataset_import_task_ids) + + +@router.post('/export', response_model=ExportDatasetResponse, dependencies=[Depends(verify_user)]) +async def export_dataset_by_dataset_ids( + user_sub: Annotated[str, Depends(get_user_sub)], + dataset_ids: Annotated[list[UUID], Query(alias="datasetIds")]): + for dataset_id in dataset_ids: + if not (await DataSetService.validate_user_action_to_dataset(user_sub, dataset_id)): + raise Exception("用户没有权限访问该数据集的数据") + dataset_export_task_ids = await DataSetService.export_dataset(dataset_ids) + return ExportDatasetResponse(result=dataset_export_task_ids) + + +@router.post('/generate', response_model=GenerateDatasetResponse, dependencies=[Depends(verify_user)]) +async def generate_dataset_by_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + dataset_id: Annotated[UUID, Query(alias="datasetId")], + generate: Annotated[bool, Query()]): + if not (await DataSetService.validate_user_action_to_dataset(user_sub, dataset_id, action)): + raise Exception("用户没有权限访问该数据集") + dataset_id = await DataSetService.generate_dataset_by_id(dataset_id, generate) + return GenerateDatasetResponse() + + +@router.put('', response_model=UpdateDatasetResponse, dependencies=[Depends(verify_user)]) +async def update_dataset_by_dataset_id( + user_sub: Annotated[str, Depends(get_user_sub)], + database_id: Annotated[UUID, Query(alias="databaseId")], + req: Annotated[UpdateDatasetRequest, Body(...)]): + if not (await DataSetService.validate_user_action_to_dataset(user_sub, database_id)): + raise Exception("用户没有权限访问该数据集") + await DataSetService.update_dataset_by_dataset_id(database_id, req) + return UpdateDatasetResponse() + + +@router.put('/data', response_model=UpdateDataResponse, dependencies=[Depends(verify_user)]) +async def update_data_by_dataset_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + data_id: Annotated[UUID, Query(alias="dataId")], + req: Annotated[UpdateDataRequest, Body(...)]): + if not (await DataSetService.validate_user_action_to_dataset(user_sub, data_id, action)): + raise Exception("用户没有权限访问该数据集的数据") + data_id = await DataSetService.update_data(data_id, req) + return UpdateDataResponse() + + +@router.delete('', response_model=DeleteDatasetResponse, dependencies=[Depends(verify_user)]) +async def delete_dataset_by_dataset_ids( + user_sub: Annotated[str, Depends(get_user_sub)], + database_ids: Annotated[list[UUID], Body(alias="databaseId")]): + for database_id in database_ids: + if not (await DataSetService.validate_user_action_to_dataset(user_sub, database_id)): + raise Exception("用户没有权限访问该数据集") + dataset_ids = await DataSetService.delete_dataset_by_dataset_ids(database_ids) + return DeleteDatasetResponse(result=dataset_ids) + + +@router.delete('/data', response_model=DeleteDataResponse, dependencies=[Depends(verify_user)]) +async def delete_data_by_data_ids( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + data_ids: Annotated[list[UUID], Body(alias="dataIds")]): + for data_id in data_ids: + if not (await DataSetService.validate_user_action_to_dataset(user_sub, data_id, action)): + raise Exception("用户没有权限访问该数据集的数据") + await DataSetService.delete_data_by_data_ids(data_ids) + return DeleteDataResponse() diff --git a/data_chain/apps/router/document.py b/data_chain/apps/router/document.py index 5d0e86f..84bd8b1 100644 --- a/data_chain/apps/router/document.py +++ b/data_chain/apps/router/document.py @@ -1,209 +1,126 @@ # Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -import urllib -from typing import Dict, List -import uuid -from fastapi import HTTPException, status -from data_chain.models.service import DocumentDTO, TemporaryDocumentDTO -from data_chain.apps.service.user_service import verify_csrf_token, get_user_id, verify_user -from data_chain.exceptions.err_code import ErrorCode -from data_chain.exceptions.exception import DocumentException -from data_chain.models.api import BaseResponse, Page -from data_chain.models.api import DeleteDocumentRequest, ListDocumentRequest, UpdateDocumentRequest, \ - RunDocumentRequest, SwitchDocumentRequest, ParserTemporaryDocumenRequest, GetTemporaryDocumentStatusRequest, \ - DeleteTemporaryDocumentRequest, RelatedTemporaryDocumenRequest -from data_chain.apps.service.knwoledge_base_service import _validate_knowledge_base_belong_to_user -from data_chain.apps.service.document_service import _validate_doucument_belong_to_user, delete_document, \ - generate_document_download_link, \ - list_documents_by_knowledgebase_id, run_document, submit_upload_document_task, switch_document, update_document, \ - get_file_name_and_extension, init_temporary_document_parse_task, delete_temporary_document, get_temporary_document_parse_status, \ - get_related_document +from fastapi import APIRouter, Depends, Query, Body, File, UploadFile +from fastapi.responses import StreamingResponse, HTMLResponse, Response +from typing import Annotated +import urllib +from uuid import UUID from httpx import AsyncClient -from fastapi import Depends -from fastapi import APIRouter, File, UploadFile -from fastapi.responses import StreamingResponse - +from typing import Annotated +from uuid import UUID +from data_chain.entities.request_data import ( + ListDocumentRequest, + UpdateDocumentRequest +) + +from data_chain.entities.response_data import ( + ListDocumentMsg, + ListDocumentResponse, + GetDocumentReportResponse, + UploadDocumentResponse, + ParseDocumentResponse, + UpdateDocumentResponse, + DeleteDocumentResponse +) +from data_chain.apps.service.session_service import get_user_sub, verify_user +from data_chain.apps.service.router_service import get_route_info +from data_chain.apps.service.knwoledge_base_service import KnowledgeBaseService +from data_chain.apps.service.document_service import DocumentService router = APIRouter(prefix='/doc', tags=['Document']) -@router.post('/list', response_model=BaseResponse[Page[DocumentDTO]], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def list(req: ListDocumentRequest, user_id=Depends(get_user_id)): - try: - await _validate_knowledge_base_belong_to_user(user_id, req.kb_id) - params = dict(req) - page_number = req.page_number - page_size = req.page_size - document_list_tuple = await list_documents_by_knowledgebase_id(params, page_number, page_size) - document_page = Page(page_number=req.page_number, page_size=req.page_size, - total=document_list_tuple[1], - data_list=document_list_tuple[0]) - return BaseResponse(data=document_page) - except Exception as e: - return BaseResponse(retcode=ErrorCode.LIST_DOCUMENT_ERROR, retmsg=str(e.args[0]), data=None) - - -@router.post('/update', response_model=BaseResponse[DocumentDTO], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def update(req: UpdateDocumentRequest, user_id=Depends(get_user_id)): - try: - await _validate_doucument_belong_to_user(user_id, req.id) - tmp_dict = dict(req) - document = await update_document(tmp_dict) - return BaseResponse(data=document) - except Exception as e: - return BaseResponse(retcode=ErrorCode.RENAME_DOCUMENT_ERROR, retmsg=str(e.args[0]), data=None) - - -@router.post('/run', response_model=BaseResponse[List[DocumentDTO]], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def run(reqs: RunDocumentRequest, user_id=Depends(get_user_id)): - try: - run = reqs.run - ids = reqs.ids - document_dto_list = [] - for req_id in ids: - await _validate_doucument_belong_to_user(user_id, req_id) - document = await run_document(dict(id=req_id, run=run)) - document_dto_list.append(document) - return BaseResponse(data=document_dto_list) - except Exception as e: - return BaseResponse(retcode=ErrorCode.RUN_DOCUMENT_ERROR, retmsg=str(e.args[0]), data=None) - - -@router.post('/switch', response_model=BaseResponse[DocumentDTO], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def switch(req: SwitchDocumentRequest, user_id=Depends(get_user_id)): - try: - await _validate_doucument_belong_to_user(user_id, req.id) - document = await switch_document(req.id, req.enabled) - return BaseResponse(data=document) - except Exception as e: - return BaseResponse(retcode=ErrorCode.SWITCH_DOCUMENT_ERROR, retmsg=str(e.args[0]), data=None) - - -@router.post('/rm', response_model=BaseResponse[int], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def rm(req: DeleteDocumentRequest, user_id=Depends(get_user_id)): - try: - for id in req.ids: - await _validate_doucument_belong_to_user(user_id, id) - deleted_cnt = await delete_document(req.ids) - return BaseResponse(data=deleted_cnt) - except Exception as e: - return BaseResponse(retcode=ErrorCode.DELETE_DOCUMENT_ERROR, retmsg=str(e.args[0]), data=None) - - -@router.post('/upload', response_model=BaseResponse[List[str]], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def upload(kb_id: str, files: List[UploadFile] = File(...), user_id=Depends(get_user_id)): - MAX_FILES = 128 - MAX_SIZE = 50 * 1024 * 1024 - MAX_TOTAL_SIZE = 500 * 1024 * 1024 - if len(files) > MAX_FILES: - raise HTTPException(status_code=400, detail="Too many files. Maximum allowed is 50.") - - total_size = 0 - for file in files: - if file.size > MAX_SIZE: - raise HTTPException(status_code=400, detail="File size exceeds the limit (25MB).") - total_size += file.size - - if total_size > MAX_TOTAL_SIZE: - raise HTTPException(status_code=400, detail="Total size of all files exceeds the limit (500MB).") - try: - await _validate_knowledge_base_belong_to_user(user_id, kb_id) - res = await submit_upload_document_task(user_id, kb_id, files) - return BaseResponse(data=res) - except Exception as e: - return BaseResponse(retcode=ErrorCode.UPLOAD_DOCUMENT_ERROR, retmsg=str(e.args[0]), data=None) - - -@router.get('/download', response_model=BaseResponse[Dict], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def download(id: uuid.UUID, user_id=Depends(get_user_id)): - try: - await _validate_doucument_belong_to_user(user_id, id) - document_link_url = await generate_document_download_link(id) - document_name, extension = await get_file_name_and_extension(id) - async with AsyncClient() as async_client: - response = await async_client.get(document_link_url) - if response.status_code == 200: - content_disposition = f"attachment; filename={urllib.parse.quote(document_name.encode('utf-8'))}" - - async def stream_generator(): - async for chunk in response.aiter_bytes(chunk_size=8192): - yield chunk - - return StreamingResponse(stream_generator(), headers={ - "Content-Disposition": content_disposition, - "Content-Length": str(response.headers.get('content-length')) - }, media_type="application/" + extension) - else: - return BaseResponse( - retcode=ErrorCode.EXPORT_KNOWLEDGE_BASE_ERROR, retmsg="Failed to retrieve the file.", data=None) - except Exception as e: - return BaseResponse(retcode=ErrorCode.DOWNLOAD_DOCUMENT_ERROR, retmsg=str(e.args[0]), data=None) - - -@router.post('/temporary/related', response_model=BaseResponse[List[uuid.UUID]]) -async def related_temporary_doc(req: RelatedTemporaryDocumenRequest): - try: - results = await get_related_document(req.content, req.top_k, req.document_ids, req.kb_sn) - return BaseResponse(data=results) - except Exception as e: - return BaseResponse(retcode=status.HTTP_500_INTERNAL_SERVER_ERROR, retmsg=str(e), data=None) - - -@router.post('/temporary/parser', response_model=BaseResponse[List[uuid.UUID]]) -async def parser_temporary_doc(req: ParserTemporaryDocumenRequest): - try: - temporary_document_list = [] - for i in range(len(req.document_list)): - tmp_dict = dict(req.document_list[i]) - if tmp_dict['type'] == 'application/pdf': - tmp_dict['type'] = '.pdf' - elif tmp_dict['type'] == 'text/html': - tmp_dict['type'] = '.html' - elif tmp_dict['type'] == 'text/plain': - tmp_dict['type'] = '.txt' - elif tmp_dict['type'] == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': - tmp_dict['type'] = '.xlsx' - elif tmp_dict['type'] == 'text/x-markdown': - tmp_dict['type'] = '.md' - elif tmp_dict['type'] == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': - tmp_dict['type'] = '.docx' - elif tmp_dict['type'] == 'application/msword': - tmp_dict['type'] = '.doc' - elif tmp_dict['type'] == 'application/vnd.openxmlformats-officedocument.presentationml.presentation': - tmp_dict['type'] = '.pptx' - temporary_document_list.append(tmp_dict) - result = await init_temporary_document_parse_task(temporary_document_list) - return BaseResponse(data=result) - except Exception as e: - return BaseResponse(retcode=status.HTTP_500_INTERNAL_SERVER_ERROR, retmsg=str(e), data=None) - - -@router.post('/temporary/status', response_model=BaseResponse[List[TemporaryDocumentDTO]]) -async def get_temporary_doc_parse_status(req: GetTemporaryDocumentStatusRequest): - try: - result = await get_temporary_document_parse_status(req.ids) - return BaseResponse(data=result) - except Exception as e: - return BaseResponse(retcode=status.HTTP_500_INTERNAL_SERVER_ERROR, retmsg=str(e), data=None) - - -@router.post('/temporary/delete', response_model=BaseResponse[List[uuid.UUID]]) -async def delete_temporary_doc(req: DeleteTemporaryDocumentRequest): - try: - result = await delete_temporary_document(req.ids) - return BaseResponse(data=result) - except Exception as e: - return BaseResponse(retcode=status.HTTP_500_INTERNAL_SERVER_ERROR, retmsg=str(e), data=None) +@router.post('/list', response_model=ListDocumentResponse, dependencies=[Depends(verify_user)]) +async def list_doc( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + req: Annotated[ListDocumentRequest, Body()] +): + if not (await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, action)): + raise Exception("用户没有权限访问该知识库的文档") + list_document_msg = await DocumentService.list_doc(req) + return ListDocumentResponse(result=list_document_msg) + + +@router.get('/download', dependencies=[Depends(verify_user)]) +async def download_doc_by_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + doc_id: Annotated[UUID, Query(alias="docId")]): + if not (await DocumentService.validate_user_action_to_document(user_sub, doc_id, action)): + raise Exception("用户没有权限访问该文档") + document_link_url = await DocumentService.generate_doc_download_url(doc_id) + document_name, extension = await DocumentService.get_doc_name_and_extension(doc_id) + async with AsyncClient() as async_client: + response = await async_client.get(document_link_url) + if response.status_code == 200: + content_disposition = f"attachment; filename={urllib.parse.quote(document_name.encode('utf-8'))}" + + async def stream_generator(): + async for chunk in response.aiter_bytes(chunk_size=8192): + yield chunk + + return StreamingResponse(stream_generator(), headers={ + "Content-Disposition": content_disposition, + "Content-Length": str(response.headers.get('content-length')) + }, media_type="application/" + extension) + else: + raise Exception(f"下载文档失败,状态码: {response.status_code}") + + +@router.get('/report', response_model=GetDocumentReportResponse, dependencies=[Depends(verify_user)]) +async def get_doc_report( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + doc_id: Annotated[UUID, Query(alias="docId")]): + if not (await DocumentService.validate_user_action_to_document(user_sub, doc_id, action)): + raise Exception("用户没有权限访问该文档") + task_report = await DocumentService.get_doc_report(doc_id) + return GetDocumentReportResponse(result=task_report) + + +@router.post('', response_model=UploadDocumentResponse, dependencies=[Depends(verify_user)]) +async def upload_docs( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + kb_id: Annotated[UUID, Query(alias="kbId")], + docs: list[UploadFile] = File(...)): + if not (await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, kb_id, action)): + raise Exception("用户没有权限上传文档到该知识库") + doc_ids = await DocumentService.upload_docs(kb_id, docs) + return UploadDocumentResponse(result=doc_ids) + + +@router.post('/parse', response_model=ParseDocumentResponse, dependencies=[Depends(verify_user)]) +async def parse_docuement_by_doc_ids( + user_sub: Annotated[str, Depends(get_user_sub)], + doc_ids: Annotated[list[UUID], Query(alias="docIds")], + parse: Annotated[bool, Query()]): + for doc_id in doc_ids: + if not (await DocumentService.validate_user_action_to_document(user_sub, doc_id, "parse")): + raise Exception("用户没有权限解析该文档") + if parse: + doc_ids = await DocumentService.parse_docs(doc_ids, parse) + return ParseDocumentResponse(result=doc_ids) + + +@router.put('', response_model=UpdateDocumentResponse, dependencies=[Depends(verify_user)]) +async def update_doc_by_doc_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + doc_id: Annotated[UUID, Query(alias="docId")], + req: Annotated[UpdateDocumentRequest, Body()]): + if not (await DocumentService.validate_user_action_to_document(user_sub, doc_id, action)): + raise Exception("用户没有权限更新该文档") + doc_id = await DocumentService.update_doc(doc_id, req) + return UpdateDocumentResponse(result=doc_id) + + +@router.delete('', response_model=DeleteDocumentResponse, dependencies=[Depends(verify_user)]) +async def delete_docs_by_ids( + user_sub: Annotated[str, Depends(get_user_sub)], + doc_ids: Annotated[list[UUID], Body(alias="docIds")]): + for doc_id in doc_ids: + if not (await DocumentService.validate_user_action_to_document(user_sub, doc_id, "delete")): + raise Exception("用户没有权限删除该文档") + await DocumentService.delete_docs_by_ids(doc_ids) + return DeleteDocumentResponse(result=doc_ids) diff --git a/data_chain/apps/router/health_check.py b/data_chain/apps/router/health_check.py index e020fcf..f126594 100644 --- a/data_chain/apps/router/health_check.py +++ b/data_chain/apps/router/health_check.py @@ -4,7 +4,7 @@ from fastapi import APIRouter, Response, status router = APIRouter( prefix="/health_check", - tags=["health_check"] + tags=["Health check"] ) diff --git a/data_chain/apps/router/knowledge_base.py b/data_chain/apps/router/knowledge_base.py index b04f161..10ceebc 100644 --- a/data_chain/apps/router/knowledge_base.py +++ b/data_chain/apps/router/knowledge_base.py @@ -1,325 +1,157 @@ # Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. + +from fastapi import APIRouter, Depends, Query, Body, File, UploadFile +from fastapi.responses import StreamingResponse, HTMLResponse, Response +from typing import Annotated import urllib -import uuid -import time -from typing import List -import uuid +from uuid import UUID from httpx import AsyncClient -from fastapi import APIRouter, File, UploadFile, status, HTTPException -from fastapi import Depends -from fastapi.responses import StreamingResponse, HTMLResponse, Response - -from data_chain.logger.logger import logger as logging -from data_chain.apps.service.user_service import verify_csrf_token, get_user_id, verify_user -from data_chain.exceptions.err_code import ErrorCode -from data_chain.exceptions.exception import KnowledgeBaseException -from data_chain.models.api import Page, BaseResponse, ExportKnowledgeBaseRequest, \ - CreateKnowledgeBaseRequest, DeleteKnowledgeBaseRequest, ListKnowledgeBaseRequest, StopTaskRequest, \ - UpdateKnowledgeBaseRequest, RmoveTaskRequest, ListTaskRequest, QueryRequest -from data_chain.apps.service.knwoledge_base_service import _validate_knowledge_base_belong_to_user, \ - create_knowledge_base, list_knowledge_base, rm_knowledge_base, generate_knowledge_base_download_link, submit_import_knowledge_base_task, \ - update_knowledge_base, list_knowledge_base_task, stop_knowledge_base_task, submit_export_knowledge_base_task, rm_knowledge_base_task, rm_all_knowledge_base_task -from data_chain.apps.service.model_service import get_model_by_kb_id -from data_chain.models.constant import KnowledgeLanguageEnum, TaskConstant -from data_chain.models.service import KnowledgeBaseDTO -from data_chain.apps.service.task_service import _validate_task_belong_to_user -from data_chain.apps.service.llm_service import question_rewrite, get_llm_answer, filter_stopwords -from data_chain.config.config import config -from data_chain.apps.service.chunk_service import get_similar_chunks, split_chunk +from data_chain.entities.request_data import ( + ListKnowledgeBaseRequest, + CreateKnowledgeBaseRequest, + UpdateKnowledgeBaseRequest, +) + +from data_chain.entities.response_data import ( + ListAllKnowledgeBaseMsg, + ListAllKnowledgeBaseResponse, + ListKnowledgeBaseResponse, + ListDocumentTypesResponse, + CreateKnowledgeBaseResponse, + ImportKnowledgeBaseResponse, + ExportKnowledgeBaseResponse, + UpdateKnowledgeBaseResponse, + DeleteKnowledgeBaseResponse, +) +from data_chain.apps.service.team_service import TeamService +from data_chain.apps.service.knwoledge_base_service import KnowledgeBaseService +from data_chain.apps.service.task_service import TaskService +from data_chain.apps.service.session_service import get_user_sub, verify_user +from data_chain.apps.service.router_service import get_route_info router = APIRouter(prefix='/kb', tags=['Knowledge Base']) -@router.post('/create', response_model=BaseResponse[KnowledgeBaseDTO], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def create(req: CreateKnowledgeBaseRequest, user_id=Depends(get_user_id)): - try: - tmp_dict = dict(req) - tmp_dict['user_id'] = user_id - knowledge_base = await create_knowledge_base(tmp_dict) - return BaseResponse(data=knowledge_base) - except Exception as e: - logging.error(f"Create knowledge base failed due to: {e}") - return BaseResponse(retcode=ErrorCode.CREATE_KNOWLEDGE_BASE_ERROR, retmsg=str(e.args[0]), data=None) - - -@router.post('/update', response_model=BaseResponse[KnowledgeBaseDTO], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def update(req: UpdateKnowledgeBaseRequest, user_id=Depends(get_user_id)): - try: - update_dict = dict(req) - update_dict['user_id'] = user_id - knowledge_base = await update_knowledge_base(update_dict) - return BaseResponse(data=knowledge_base) - except Exception as e: - logging.error(f"Update knowledge base failed due to: {e}") - return BaseResponse(retcode=ErrorCode.UPDATE_KNOWLEDGE_BASE_ERROR, retmsg=str(e.args[0]), data=None) - - -@router.post('/list', response_model=BaseResponse[Page[KnowledgeBaseDTO]], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def list(req: ListKnowledgeBaseRequest, user_id=Depends(get_user_id)): - try: - params = dict(req) - params['user_id'] = user_id - page_number = req.page_number - page_size = req.page_size - knowledge_base_list_tuple = await list_knowledge_base(params, page_number, page_size) - knowledge_base_page = Page(page_number=req.page_number, page_size=req.page_size, - total=knowledge_base_list_tuple[1], - data_list=knowledge_base_list_tuple[0]) - return BaseResponse(data=knowledge_base_page) - except Exception as e: - logging.error(f"List knowledge base failed due to: {e}") - return BaseResponse(retcode=ErrorCode.LIST_KNOWLEDGE_BASE_ERROR, retmsg=str(e.args[0]), data=None) - - -@router.post('/rm', response_model=BaseResponse[bool], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def rm(req: DeleteKnowledgeBaseRequest, user_id=Depends(get_user_id)): - try: - await _validate_knowledge_base_belong_to_user(user_id, req.id) - res = await rm_knowledge_base(req.id) - return BaseResponse(data=res) - except Exception as e: - logging.error(f"Rmove knowledge base failed due to: {e}") - return BaseResponse(retcode=ErrorCode.DELETE_KNOWLEDGE_BASE_ERROR, retmsg=str(e.args[0]), data=None) - - -@router.post('/import', response_model=BaseResponse[List[str]], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def import_(files: List[UploadFile] = File(...), user_id=Depends(get_user_id)): - try: - res = await submit_import_knowledge_base_task(user_id, files) - return BaseResponse(data=res) - except Exception as e: - logging.error(f"Import knowledge base failed due to: {e}") - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) - - -@router.post('/export', response_model=BaseResponse[str], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def export(req: ExportKnowledgeBaseRequest, user_id=Depends(get_user_id)): - try: - await _validate_knowledge_base_belong_to_user(user_id, req.id) - res = await submit_export_knowledge_base_task(user_id, req.id) - return BaseResponse(data=res) - except Exception as e: - logging.error(f"Export knowledge base failed due to: {e}") - return BaseResponse(retcode=ErrorCode.EXPORT_KNOWLEDGE_BASE_ERROR, retmsg=str(e.args[0]), data=None) - - -@router.get('/download', dependencies=[Depends(verify_user), Depends(verify_csrf_token)]) -async def download(task_id: uuid.UUID, user_id=Depends(get_user_id)): - try: - await _validate_task_belong_to_user(user_id, task_id) - zip_download_url = await generate_knowledge_base_download_link(task_id) - if not zip_download_url: - return BaseResponse( - retcode=ErrorCode.EXPORT_KNOWLEDGE_BASE_ERROR, - retmsg="zip download url is empty", - data=None +@router.get('', response_model=ListAllKnowledgeBaseResponse, dependencies=[Depends(verify_user)]) +async def list_kb_by_user_sub( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + kb_name: str | None = Query(default=None, alias="kbName") +): + list_all_kb_msg = await KnowledgeBaseService.list_kb_by_user_sub(user_sub, kb_name=kb_name) + return ListAllKnowledgeBaseResponse(result=list_all_kb_msg) + + +@router.post('/team', response_model=ListKnowledgeBaseResponse, dependencies=[Depends(verify_user)]) +async def list_kb_by_team_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + req: Annotated[ListKnowledgeBaseRequest, Body()] +): + if not await TeamService.validate_user_action_in_team(user_sub, req.team_id, action): + raise Exception("用户没有权限访问该团队的知识库") + list_kb_msg = await KnowledgeBaseService.list_kb_by_team_id(req) + return ListKnowledgeBaseResponse(result=list_kb_msg) + + +@router.get('/doc_type', response_model=ListDocumentTypesResponse, dependencies=[Depends(verify_user)]) +async def list_doc_types_by_kb_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + kb_id: Annotated[UUID, Query(alias="kbId")], +): + if not await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, kb_id, action): + raise Exception("用户没有权限访问该知识库的文档类型") + list_doc_types_msg = await KnowledgeBaseService.list_doc_types_by_kb_id(kb_id) + return ListDocumentTypesResponse(result=list_doc_types_msg) + + +@router.get('/download', dependencies=[Depends(verify_user)]) +async def download_kb_by_task_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + task_id: Annotated[UUID, Query(alias="taskId")]): + if not await TaskService.validate_user_action_to_task(user_sub, task_id, action): + raise Exception("用户没有权限访问该知识库的任务") + zip_download_url = await KnowledgeBaseService.generate_knowledge_base_download_link(task_id) + if not zip_download_url: + raise Exception("知识库下载连接生成失败") + async with AsyncClient() as async_client: + response = await async_client.get(zip_download_url) + if response.status_code == 200: + zip_name = f"{task_id}.zip" + content_disposition = f"attachment; filename={urllib.parse.quote(zip_name.encode('utf-8'))}" + content_length = response.headers.get('content-length') + + # 定义一个协程函数来生成数据流 + async def stream_generator(): + async for chunk in response.aiter_bytes(chunk_size=8192): + yield chunk + + return StreamingResponse( + stream_generator(), + headers={ + "Content-Disposition": content_disposition, + "Content-Length": str(content_length) if content_length else None + }, + media_type="application/zip" ) - async with AsyncClient() as async_client: - response = await async_client.get(zip_download_url) - if response.status_code == 200: - # 保持 response 对象打开直到所有数据都被发送 - zip_name = f"{task_id}.zip" - content_disposition = f"attachment; filename={urllib.parse.quote(zip_name.encode('utf-8'))}" - content_length = response.headers.get('content-length') - - # 定义一个协程函数来生成数据流 - async def stream_generator(): - async for chunk in response.aiter_bytes(chunk_size=8192): - yield chunk - - return StreamingResponse( - stream_generator(), - headers={ - "Content-Disposition": content_disposition, - "Content-Length": str(content_length) if content_length else None - }, - media_type="application/zip" - ) - else: - return BaseResponse( - retcode=ErrorCode.EXPORT_KNOWLEDGE_BASE_ERROR, - retmsg="Failed to retrieve the file.", - data=None - ) - except Exception as e: - logging.error(f"Download knowledge base zip failed due to: {e}") - return BaseResponse( - retcode=ErrorCode.EXPORT_KNOWLEDGE_BASE_ERROR, - retmsg=str(e.args[0]), - data=None - ) - - -@router.get('/language', response_model=BaseResponse[List[str]], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -def language(): - return BaseResponse(data=KnowledgeLanguageEnum.get_all_values()) - - -@router.post('/task/list', response_model=BaseResponse[Page[KnowledgeBaseDTO]], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def list_kb_task(req: ListTaskRequest, user_id=Depends(get_user_id)): - try: - params = dict(req) - params['user_id'] = user_id - if 'types' not in params.keys(): - params['types'] = [TaskConstant.EXPORT_KNOWLEDGE_BASE, TaskConstant.IMPORT_KNOWLEDGE_BASE] - total, knowledge_dto_list = await list_knowledge_base_task(req.page_number, req.page_size, params) - knowledge_base_page = Page(page_number=req.page_number, page_size=req.page_size, - total=total, - data_list=knowledge_dto_list) - return BaseResponse(data=knowledge_base_page) - except Exception as e: - logging.error(f"List knowledge base task error due to: {e}") - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) - - -@router.post('/task/rm', response_model=BaseResponse[bool], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def rm_kb_task(req: RmoveTaskRequest, user_id=Depends(get_user_id)): - try: - if req.task_id is not None: - await _validate_task_belong_to_user(user_id, req.task_id) - res = await rm_knowledge_base_task(req.task_id) - else: - if req.types is None: - types = [TaskConstant.EXPORT_KNOWLEDGE_BASE, TaskConstant.IMPORT_KNOWLEDGE_BASE] - else: - types = req.types - res = await rm_all_knowledge_base_task(user_id, types) - return BaseResponse(data=res) - except Exception as e: - logging.error(f"Remove knowledge base task failed due to: {e}") - return BaseResponse(retcode=ErrorCode.STOP_KNOWLEDGE_BASE_TASK_ERROR, retmsg=e.args[0], data=None) - - -@router.post('/get_stream_answer', response_class=HTMLResponse) -async def get_stream_answer(req: QueryRequest, response: Response): - model_dto = None - if req.kb_sn is not None: - model_dto = await get_model_by_kb_id(req.kb_sn) - if model_dto is None: - if len(config['MODELS']) > 0: - tokens_upper = config['MODELS'][0]['MAX_TOKENS'] - else: - logging.error("Can not find model config locally") - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Can not find model config locally") - else: - tokens_upper = model_dto.max_tokens - try: - question = await question_rewrite(req.history, req.question, model_dto) - max_tokens = tokens_upper//3*2 - bac_info = '' - document_chunk_list = await get_similar_chunks(content=question, kb_id=req.kb_sn, temporary_document_ids=req.document_ids, max_tokens=2*tokens_upper, topk=req.top_k) - for i in range(len(document_chunk_list)): - document_name = document_chunk_list[i]['document_name'] - chunk_list = document_chunk_list[i]['chunk_list'] - bac_info += '文档名称:'+document_name+':\n\n' - for j in range(len(chunk_list)): - bac_info += '段落'+str(j)+':\n\n' - bac_info += chunk_list[j]+'\n\n' - bac_info = split_chunk(bac_info) - if len(bac_info) > max_tokens: - bac_info = '' - for i in range(len(document_chunk_list)): - document_name = document_chunk_list[i]['document_name'] - chunk_list = document_chunk_list[i]['chunk_list'] - bac_info += '文档名称:'+document_name+':\n\n' - for j in range(len(chunk_list)): - bac_info += '段落'+str(j)+':\n\n' - bac_info += ''.join(filter_stopwords(chunk_list[j]))+'\n\n' - bac_info = split_chunk(bac_info) - bac_info = bac_info[:max_tokens] - bac_info = ''.join(bac_info) - except Exception as e: - bac_info = '' - logging.error(f"get bac info failed due to: {e}") - try: - response.headers["Content-Type"] = "text/event-stream" - res = await get_llm_answer(req.history, bac_info, req.question, is_stream=True, model_dto=model_dto) - return StreamingResponse( - res, - status_code=status.HTTP_200_OK, - headers=response.headers - ) - except Exception as e: - logging.error(f"Get stream answer failed due to: {e}") - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) - - -@router.post('/get_answer', response_model=BaseResponse[dict]) -async def get_answer(req: QueryRequest): - model_dto = None - if req.kb_sn is not None: - model_dto = await get_model_by_kb_id(req.kb_sn) - if model_dto is None: - if len(config['MODELS']) > 0: - tokens_upper = config['MODELS'][0]['MAX_TOKENS'] else: - logging.error("Can not find model config locally") - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Can not find model config locally") - else: - tokens_upper = model_dto.max_tokens - try: - question = await question_rewrite(req.history, req.question, model_dto) - max_tokens = tokens_upper//3*2 - bac_info = '' - t_cost_dict, document_chunk_list = await get_similar_chunks(content=question, kb_id=req.kb_sn, temporary_document_ids=req.document_ids, max_tokens=2*tokens_upper, topk=req.top_k, return_t_cost=True) - for i in range(len(document_chunk_list)): - document_name = document_chunk_list[i]['document_name'] - chunk_list = document_chunk_list[i]['chunk_list'] - bac_info += '文档名称:'+document_name+':\n\n' - for j in range(len(chunk_list)): - bac_info += '段落'+str(j)+':\n\n' - bac_info += chunk_list[j]+'\n\n' - bac_info = split_chunk(bac_info) - if len(bac_info) > max_tokens: - bac_info = '' - for i in range(len(document_chunk_list)): - document_name = document_chunk_list[i]['document_name'] - chunk_list = document_chunk_list[i]['chunk_list'] - bac_info += '文档名称:'+document_name+':\n\n' - for j in range(len(chunk_list)): - bac_info += '段落'+str(j)+':\n\n' - bac_info += ''.join(filter_stopwords(chunk_list[j]))+'\n\n' - bac_info = split_chunk(bac_info) - bac_info = bac_info[:max_tokens] - bac_info = ''.join(bac_info) - except Exception as e: - bac_info = '' - logging.error(f"get bac info failed due to: {e}") - try: - st = time.time() - answer = await get_llm_answer(req.history, bac_info, req.question, is_stream=False, model_dto=model_dto) - t_cost_dict['llm_answer'] = time.time()-st - tmp_dict = { - 'answer': answer, - 'time_cost': t_cost_dict - } - if req.fetch_source: - tmp_dict['source'] = [] - for i in range(len(document_chunk_list)): - document_name = document_chunk_list[i]['document_name'] - chunk_list = document_chunk_list[i]['chunk_list'] - for j in range(len(chunk_list)): - tmp_dict['source'].append({'document_name': document_name, 'chunk': chunk_list[j]}) - return BaseResponse(data=tmp_dict) - except Exception as e: - logging.error(f"Get stream answer failed due to: {e}") - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + raise Exception(f"下载知识库 zip 失败,状态码: {response.status_code}") + + +@router.post('', response_model=CreateKnowledgeBaseResponse, dependencies=[Depends(verify_user)]) +async def create_kb(user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + team_id: Annotated[UUID, Query(alias="teamId")], + req: Annotated[CreateKnowledgeBaseRequest, Body()]): + if not await TeamService.validate_user_action_in_team(user_sub, team_id, action): + raise Exception("用户没有权限在该团队创建知识库") + kb_id = await KnowledgeBaseService.create_kb(user_sub, team_id, req) + return CreateKnowledgeBaseResponse(result=kb_id) + + +@router.post('/import', response_model=ImportKnowledgeBaseResponse, dependencies=[Depends(verify_user)]) +async def import_kbs(user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + team_id: Annotated[UUID, Query(alias="teamId")], + kb_packages: list[UploadFile] = File(...)): + if not await TeamService.validate_user_action_in_team(user_sub, team_id, action): + raise Exception("用户没有权限在该团队导入知识库") + kb_import_task_ids = await KnowledgeBaseService.import_kb(user_sub, team_id, kb_packages) + return ImportKnowledgeBaseResponse(result=kb_import_task_ids) + + +@router.post('/export', response_model=ExportKnowledgeBaseResponse, dependencies=[Depends(verify_user)]) +async def export_kb_by_kb_ids( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + kb_ids: Annotated[list[UUID], Query(alias="kbIds")]): + for kb_id in kb_ids: + if not await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, kb_id, action): + raise Exception("用户没有权限在该知识库导出知识库") + kb_export_task_ids = KnowledgeBaseService.export_kb_by_kb_ids(kb_ids) + return ExportKnowledgeBaseResponse(result=kb_export_task_ids) + + +@router.put('', response_model=UpdateKnowledgeBaseResponse, dependencies=[Depends(verify_user)]) +async def update_kb_by_kb_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + kb_id: Annotated[UUID, Query(alias="kbId")], + req: Annotated[UpdateKnowledgeBaseRequest, Body()]): + if not await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, kb_id, action): + raise Exception("用户没有权限在该知识库更新知识库") + kb_id = await KnowledgeBaseService.update_kb_by_kb_id(kb_id, req) + return UpdateKnowledgeBaseResponse(result=kb_id) + + +@router.delete('', response_model=DeleteKnowledgeBaseResponse, dependencies=[Depends(verify_user)]) +async def delete_kb_by_kb_ids( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + kb_ids: Annotated[list[UUID], Body(alias="kbIds")]): + for kb_id in kb_ids: + if not await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, kb_id, action): + raise Exception("用户没有权限在该知识库删除知识库") + kb_ids_deleted = await KnowledgeBaseService.delete_kb_by_kb_ids(kb_ids) + return DeleteKnowledgeBaseResponse(result=kb_ids_deleted) diff --git a/data_chain/apps/router/model.py b/data_chain/apps/router/model.py deleted file mode 100644 index acec0ab..0000000 --- a/data_chain/apps/router/model.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from fastapi import Depends -from fastapi import APIRouter -from typing import List -from data_chain.models.service import ModelDTO -from data_chain.apps.service.user_service import verify_csrf_token, get_user_id, verify_user -from data_chain.exceptions.err_code import ErrorCode -from data_chain.exceptions.exception import DocumentException -from data_chain.models.api import BaseResponse -from data_chain.models.api import UpdateModelRequest -from data_chain.apps.service.model_service import get_model_by_user_id, list_offline_model, update_model - - -router = APIRouter(prefix='/model', tags=['Model']) - - -@router.post('/update', response_model=BaseResponse[ModelDTO], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def update(req: UpdateModelRequest, user_id=Depends(get_user_id)): - try: - update_dict = dict(req) - update_dict['user_id']=user_id - model_dto = await update_model(user_id, update_dict) - model_dto.openai_api_key=None - return BaseResponse(data=model_dto) - except Exception as e: - return BaseResponse(retcode=ErrorCode.UPDATE_MODEL_ERROR, retmsg=str(e.args[0]), data=None) - - -@router.get('/get', response_model=BaseResponse[ModelDTO], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def get(user_id=Depends(get_user_id)): - try: - model_dto = await get_model_by_user_id(user_id) - model_dto.openai_api_key = None - return BaseResponse(data=model_dto) - except Exception as e: - return BaseResponse(retcode=ErrorCode.UPDATE_MODEL_ERROR, retmsg=str(e.args[0]), data=None) - - -@router.get('/list', response_model=BaseResponse[List[ModelDTO]], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def list(): - try: - model_dto_list = await list_offline_model() - return BaseResponse(data=model_dto_list) - except Exception as e: - return BaseResponse(retcode=ErrorCode.UPDATE_MODEL_ERROR, retmsg=str(e.args[0]), data=None) diff --git a/data_chain/apps/router/other.py b/data_chain/apps/router/other.py index 70eb786..3085e9a 100644 --- a/data_chain/apps/router/other.py +++ b/data_chain/apps/router/other.py @@ -1,26 +1,74 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from typing import List +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +import base64 +from fastapi import APIRouter, Depends, Query, Body +import json +import hashlib +from typing import Annotated +from uuid import UUID +from data_chain.config.config import config +from data_chain.entities.enum import Embedding, Tokenizer, ParseMethod, SearchMethod +from data_chain.entities.response_data import ( + LLM, + ListLLMMsg, + ListLLMResponse, + ListEmbeddingResponse, + ListTokenizerResponse, + ListParseMethodResponse, + ListSearchMethodResponse +) +from data_chain.apps.service.session_service import get_user_sub, verify_user +from data_chain.apps.service.router_service import get_route_info +router = APIRouter(prefix='/other', tags=['Other']) -from data_chain.apps.service.user_service import verify_csrf_token, get_user_id, verify_user -from data_chain.models.api import BaseResponse -from data_chain.models.constant import EmbeddingModelEnum, ParseMethodEnum -from fastapi import APIRouter -from fastapi import Depends +@router.get('/llm', response_model=ListLLMResponse, dependencies=[Depends(verify_user)]) +async def list_llms_by_user_sub( + user_sub: Annotated[str, Depends(get_user_sub)], +): + with open('./data_chain/llm/icon/ollama.svg', 'r', encoding='utf-8') as file: + svg_content = file.read() + svg_bytes = svg_content.encode('utf-8') + base64_bytes = base64.b64encode(svg_bytes) + base64_string = base64_bytes.decode('utf-8') + config_params = { + 'MODEL_NAME': config['MODEL_NAME'], + 'OPENAI_API_BASE': config['OPENAI_API_BASE'], + 'OPENAI_API_KEY': config['OPENAI_API_KEY'], + 'REQUEST_TIMEOUT': config['REQUEST_TIMEOUT'], + 'MAX_TOKENS': config['MAX_TOKENS'], + 'TEMPERATURE': config['TEMPERATURE'] + } + config_json = json.dumps(config_params, sort_keys=True, ensure_ascii=False).encode('utf-8') + hash_object = hashlib.sha256(config_json) + hash_hex = hash_object.hexdigest() + llm = LLM( + llmId=hash_hex, + llmName=config['MODEL_NAME'], + llmIcon=base64_string, + ) + list_llm_msg = ListLLMMsg(llms=[llm]) + return ListLLMResponse(result=list_llm_msg) -router = APIRouter(prefix='/other', tags=['Other Api']) +@router.get('/embedding', response_model=ListEmbeddingResponse, dependencies=[Depends(verify_user)]) +async def list_embeddings(): + embeddings = [embedding.value for embedding in Embedding] + return ListEmbeddingResponse(result=embeddings) -@router.get('/embedding_model', response_model=BaseResponse[List[str]], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def embedding_model(): - return BaseResponse(data=EmbeddingModelEnum.get_all_values()) +@router.get('/tokenizer', response_model=ListTokenizerResponse, dependencies=[Depends(verify_user)]) +async def list_tokenizers(): + tokenizers = [tokenizer.value for tokenizer in Tokenizer] + return ListTokenizerResponse(result=tokenizers) -@router.get('/parse_method', response_model=BaseResponse[List[str]], - dependencies=[Depends(verify_user), - Depends(verify_csrf_token)]) -async def parse_method(): - return BaseResponse(data=ParseMethodEnum.get_all_values()) +@router.get('/parse_method', response_model=ListParseMethodResponse, dependencies=[Depends(verify_user)]) +async def list_parse_method(): + parse_methods = [parse_method.value for parse_method in ParseMethod] + return ListParseMethodResponse(result=parse_methods) + + +@router.get('/search_method', response_model=ListSearchMethodResponse, dependencies=[Depends(verify_user)]) +async def list_search_method(): + search_methods = [search_method.value for search_method in SearchMethod] + return ListSearchMethodResponse(result=search_methods) diff --git a/data_chain/apps/router/role.py b/data_chain/apps/router/role.py new file mode 100644 index 0000000..5271156 --- /dev/null +++ b/data_chain/apps/router/role.py @@ -0,0 +1,58 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. + +from fastapi import APIRouter, Depends, Query, Body +from typing import Annotated +from uuid import UUID +from data_chain.entities.request_data import ( + ListRoleRequest, + CreateRoleRequest, + UpdateRoleRequest +) + +from data_chain.entities.response_data import ( + ListActionResponse, + ListRoleResponse, + CreateRoleResponse, + UpdateRoleResponse, + DeleteRoleResponse +) +from data_chain.apps.service.session_service import get_user_sub, verify_user +from data_chain.apps.service.router_service import get_route_info +router = APIRouter(prefix='/role', tags=['Role']) + + +@router.get('/action', response_model=ListActionResponse, dependencies=[Depends(verify_user)]) +async def list_actions( + user_sub: Annotated[str, Depends(get_user_sub)], +): + return ListActionResponse() + + +@router.post('/list', response_model=ListRoleResponse, dependencies=[Depends(verify_user)]) +async def list_role_by_team_id( + user_sub: Annotated[str, Depends(get_user_sub)], + req: Annotated[ListRoleRequest, Body()], +): + return ListRoleResponse() + + +@router.post('', response_model=CreateRoleResponse, dependencies=[Depends(verify_user)]) +async def create_role(user_sub: Annotated[str, Depends(get_user_sub)], + team_id: Annotated[UUID, Query(alias="TeamId")], + req: Annotated[CreateRoleRequest, Body()]): + return CreateRoleResponse() + + +@router.put('', response_model=UpdateRoleResponse, dependencies=[Depends(verify_user)]) +async def update_role_by_role_id( + user_sub: Annotated[str, Depends(get_user_sub)], + role_id: Annotated[UUID, Query(alias="roleId")], + req: Annotated[UpdateRoleRequest, Body()]): + return UpdateRoleResponse() + + +@router.delete('', response_model=DeleteRoleResponse, dependencies=[Depends(verify_user)]) +async def delete_role_by_role_ids( + user_sub: Annotated[str, Depends(get_user_sub)], + role_ids: Annotated[list[UUID], Body(alias="roleId")]): + return DeleteRoleResponse() diff --git a/data_chain/apps/router/task.py b/data_chain/apps/router/task.py new file mode 100644 index 0000000..4ac458b --- /dev/null +++ b/data_chain/apps/router/task.py @@ -0,0 +1,57 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from data_chain.apps.service.session_service import get_user_sub, verify_user +from fastapi import APIRouter, Depends, Query, Body +from typing import Annotated +from uuid import UUID +from data_chain.entities.request_data import ( + ListTaskRequest +) + +from data_chain.entities.response_data import ( + ListTaskResponse, + GetTaskReportResponse, + DeleteTaskByIdResponse, + DeleteTaskByTypeResponse +) +from data_chain.entities.enum import TaskType +from data_chain.apps.service.router_service import get_route_info +from data_chain.apps.service.team_service import TeamService +from data_chain.apps.service.task_service import TaskService +router = APIRouter(prefix='/task', tags=['Task']) + + +@router.post('', response_model=ListTaskResponse, dependencies=[Depends(verify_user)]) +async def list_task( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + req: Annotated[ListTaskRequest, Body()] +): + if not (await TeamService.validate_user_action_in_team(user_sub, req.team_id, action)): + raise Exception("用户没有权限访问该团队的任务") + list_task_msg = await TaskService.list_task(user_sub, req) + return ListTaskResponse(result=list_task_msg) + + +@router.delete('/one', response_model=DeleteTaskByIdResponse, dependencies=[Depends(verify_user)]) +async def delete_task_by_task_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + task_id: Annotated[UUID, Query(alias="taskId")], +): + if not (await TaskService.validate_user_action_to_task(user_sub, task_id, action)): + raise Exception("用户没有权限访问该团队的任务") + task_ids = await TaskService.delete_task_by_task_id(task_id) + return DeleteTaskByIdResponse() + + +@router.delete('/all', response_model=DeleteTaskByTypeResponse, dependencies=[Depends(verify_user)]) +async def delete_task_by_task_type( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + team_id: Annotated[UUID, Query(alias="teamId")], + task_type: Annotated[TaskType, Query(alias="taskType")], +): + if not (await TeamService.validate_user_action_in_team(user_sub, team_id, action)): + raise Exception("用户没有权限访问该团队的任务") + task_ids = await TaskService.delete_task_by_type(user_sub, team_id, task_type, task_ids) + return DeleteTaskByTypeResponse() diff --git a/data_chain/apps/router/team.py b/data_chain/apps/router/team.py new file mode 100644 index 0000000..9a171d7 --- /dev/null +++ b/data_chain/apps/router/team.py @@ -0,0 +1,132 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + +from fastapi import APIRouter, Depends, Query, Body +from typing import Annotated +from uuid import UUID +from data_chain.entities.request_data import ( + ListTeamRequest, + ListTeamMsgRequest, + ListTeamUserRequest, + CreateTeamRequest, + UpdateTeamRequest, +) + +from data_chain.entities.response_data import ( + ListTeamMsg, + ListTeamResponse, + ListTeamMsgResponse, + ListTeamUserResponse, + CreateTeamResponse, + UpdateTeamResponse, + DeleteTeamResponse, + UpdateTeamUserRoleResponse, + UpdateTeamAuthorResponse, + DeleteTeamUserResponse, + JoinTeamResponse, + InviteTeamUserResponse +) +from data_chain.apps.service.session_service import get_user_sub, verify_user +from data_chain.apps.service.team_service import TeamService +from data_chain.apps.service.router_service import get_route_info +router = APIRouter(prefix='/team', tags=['Team']) + + +@router.post('/list', response_model=ListTeamResponse, dependencies=[Depends(verify_user)]) +async def list_teams( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + req: Annotated[ListTeamRequest, Body()] +): + list_team_msg = await TeamService.list_teams(user_sub, req) + return ListTeamResponse(message='团队列表获取成功', result=list_team_msg) + + +@router.post('/usr', response_model=ListTeamUserResponse, dependencies=[Depends(verify_user)]) +async def list_team_user_by_team_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + req: Annotated[ListTeamUserRequest, Body()]): + return ListTeamUserResponse() + + +@router.post('/msg', response_model=ListTeamMsgResponse, dependencies=[Depends(verify_user)]) +async def list_team_msg_by_team_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + req: Annotated[ListTeamMsgRequest, Body()]): + return ListTeamMsgResponse() + + +@router.post('', response_model=CreateTeamResponse, dependencies=[Depends(verify_user)]) +async def create_team(user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + req: Annotated[CreateTeamRequest, Body()]): + team_id = await TeamService.create_team(user_sub, req) + return CreateTeamResponse(message='团队创建成功', result=team_id) + + +@router.post('/invitation', response_model=InviteTeamUserResponse, dependencies=[Depends(verify_user)]) +async def invite_team_user_by_user_sub( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + team_id: Annotated[UUID, Query(alias="teamId")], + user_sub_invite: Annotated[str, Query(alias="userSubInvite")]): + return InviteTeamUserResponse() + + +@router.post('/application', response_model=JoinTeamResponse, dependencies=[Depends(verify_user)]) +async def join_team( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + team_id: Annotated[UUID, Query(alias="teamId")]): + return JoinTeamResponse() + + +@router.put('', response_model=UpdateTeamResponse, dependencies=[Depends(verify_user)]) +async def update_team_by_team_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + team_id: Annotated[UUID, Query(alias="teamId")], + req: Annotated[UpdateTeamRequest, Body()]): + if not (await TeamService.validate_user_action_in_team(user_sub, team_id, action)): + raise Exception('用户没有权限修改该团队') + team_id = await TeamService.update_team_by_team_id(user_sub, team_id, req) + return UpdateTeamResponse(message='团队更新成功', result=team_id) + + +@router.put('/usr', response_model=UpdateTeamUserRoleResponse, dependencies=[Depends(verify_user)]) +async def update_team_by_team_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + team_id: Annotated[UUID, Query(alias="teamId")], + role_id: Annotated[UUID, Query(alias="roleId")]): + return UpdateTeamUserRoleResponse() + + +@router.put('/author', response_model=UpdateTeamAuthorResponse, dependencies=[Depends(verify_user)]) +async def update_team_author_by_team_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + recriver_sub: Annotated[str, Query(alias="recriverSub")], + team_id: Annotated[UUID, Query(alias="teamId")]): + return UpdateTeamAuthorResponse() + + +@router.delete('', response_model=DeleteTeamResponse, dependencies=[Depends(verify_user)]) +async def delete_team_by_team_id( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + team_id: Annotated[UUID, Query(alias="teamId")]): + if not (await TeamService.validate_user_action_in_team(user_sub, team_id, action)): + raise Exception('用户没有权限删除该团队') + team_id = await TeamService.soft_delete_team_by_team_id(team_id) + return DeleteTeamResponse(message='团队删除成功', result=team_id) + + +@router.delete('/usr', response_model=DeleteTeamUserResponse, dependencies=[Depends(verify_user)]) +async def delete_team_user_by_team_id_and_user_subs( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + team_id: Annotated[UUID, Query(alias="teamId")], + user_subs: Annotated[list[str], Body(alias="userSub")]): + return DeleteTeamUserResponse() diff --git a/data_chain/apps/router/user.py b/data_chain/apps/router/user.py index 74217d7..96cb602 100644 --- a/data_chain/apps/router/user.py +++ b/data_chain/apps/router/user.py @@ -1,16 +1,16 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from __future__ import annotations -import uuid -from data_chain.logger.logger import logger as logging - -from fastapi import APIRouter, Depends, Request, Response, status -from data_chain.config.config import config -from data_chain.apps.service.user_service import verify_csrf_token, verify_passwd, get_user_id, verify_user -from data_chain.apps.base.session.session import SessionManager -from data_chain.manager.knowledge_manager import KnowledgeBaseManager -from data_chain.manager.user_manager import UserManager -from data_chain.models.api import BaseResponse, AddUserRequest, UpdateUserRequest +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from fastapi import APIRouter, Depends, Query, Body +from typing import Annotated +from uuid import UUID +from data_chain.entities.request_data import ( + ListUserRequest +) +from data_chain.entities.response_data import ( + ListUserResponse +) +from data_chain.apps.service.session_service import get_user_sub, verify_user +from data_chain.apps.service.router_service import get_route_info router = APIRouter( prefix="/user", @@ -18,151 +18,9 @@ router = APIRouter( ) -@router.post("/add", response_model=BaseResponse) -async def add_user(request: AddUserRequest): - name = request.name - email = request.email - account = request.account - passwd = request.passwd - user_entity = await UserManager.get_user_info_by_account(account) - if user_entity is not None: - return BaseResponse( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - retmsg="Sign failed due to duplicate account", - data={} - ) - if email is not None: - user_entity = await UserManager.get_user_info_by_email(email) - if user_entity is not None: - return BaseResponse( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - retmsg="Sign failed due to duplicate email", - data={} - ) - user_entity = await UserManager.add_user(name,email,account, passwd) - if user_entity is None: - return BaseResponse( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - retmsg="Sign failed due to add user failed", - data={} - ) - return BaseResponse( - code=status.HTTP_200_OK, - retmsg="Sign successful", - data={} - ) - - -@router.post("/del", response_model=BaseResponse, dependencies=[Depends(verify_user), Depends(verify_csrf_token)]) -async def del_user(request: Request, response: Response, user_id=Depends(get_user_id)): - session_id = request.cookies['WD_ECSESSION'] - if not SessionManager.verify_user(session_id): - logging.info("User already logged out.") - return BaseResponse(code=200, retmsg="ok", data={}) - - SessionManager.delete_session(user_id) - response.delete_cookie("WD_ECSESSION") - response.delete_cookie("wd_csrf_tk") - await UserManager.del_user_by_user_id(user_id) - response_data = BaseResponse( - code=status.HTTP_200_OK, - retmsg="Cancel successful", - data={} - ) - return response_data - - -@router.get("/login", response_model=BaseResponse, dependencies=[Depends(verify_passwd)]) -async def login(request: Request, response: Response, account: str): - user_info = await UserManager.get_user_info_by_account(account) - if user_info is None: - return BaseResponse( - code=status.HTTP_401_UNAUTHORIZED, - retmsg="Login failed", - data={} - ) - - user_id = user_info.id - try: - SessionManager.delete_session(user_id) - current_session = SessionManager.create_session(user_id) - except Exception as e: - logging.error(f"Change session failed: {e}") - return BaseResponse( - code=status.HTTP_401_UNAUTHORIZED, - retmsg="Login failed", - data={} - ) - - new_csrf_token = SessionManager.create_csrf_token(current_session) - if config['COOKIE_MODE'] == 'DEBUG': - response.set_cookie( - "wd_csrf_tk", - new_csrf_token - ) - response.set_cookie( - "WD_ECSESSION", - current_session - ) - else: - response.set_cookie( - "wd_csrf_tk", - new_csrf_token, - max_age=config["SESSION_TTL"] * 60, - secure=config['SSL_ENABLE'], - domain=config["DOMAIN"], - samesite="strict" - ) - response.set_cookie( - "WD_ECSESSION", - current_session, - max_age=config["SESSION_TTL"] * 60, - secure=config['SSL_ENABLE'], - domain=config["DOMAIN"], - httponly=True, - samesite="strict" - ) - response_data = BaseResponse( - code=status.HTTP_200_OK, - retmsg="Login successful", - data={ - 'name': user_info.name, - 'language': user_info.language - } - ) - return response_data - - -@router.get("/logout", response_model=BaseResponse, dependencies=[Depends(verify_csrf_token)]) -async def logout(request: Request, response: Response, user_id=Depends(get_user_id)): - session_id = request.cookies['WD_ECSESSION'] - if not SessionManager.verify_user(session_id): - logging.info("User already logged out.") - return BaseResponse(code=200, retmsg="ok", data={}) - - SessionManager.delete_session(user_id) - response.delete_cookie("WD_ECSESSION") - response.delete_cookie("wd_csrf_tk") - return { - "code": status.HTTP_200_OK, - "rtmsg": "Logout success", - "data": {} - } - - -@router.post("/update", response_model=BaseResponse, dependencies=[Depends(verify_user), Depends(verify_csrf_token)]) -async def switch(req: UpdateUserRequest, user_id=Depends(get_user_id)): - user_info = UserManager.get_user_info_by_user_id(user_id) - if user_info is None: - return BaseResponse( - code=status.HTTP_401_UNAUTHORIZED, - retmsg="User is not exist", - data={} - ) - tmp_dict = dict(req) - UserManager.update_user_by_user_id(user_id, tmp_dict) - return { - "code": status.HTTP_200_OK, - "rtmsg": "Update success", - "data": {} - } +@router.post("/list", response_model=ListUserResponse, dependencies=[Depends(verify_user)]) +async def list_users( + user_sub: Annotated[str, Query(default=None, alias="userSub")], + req: Annotated[ListUserRequest, Body()] +): + return ListUserResponse() diff --git a/data_chain/apps/router/usr_message.py b/data_chain/apps/router/usr_message.py new file mode 100644 index 0000000..5018442 --- /dev/null +++ b/data_chain/apps/router/usr_message.py @@ -0,0 +1,37 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. + +from fastapi import APIRouter, Depends, Query, Body +from typing import Annotated +from uuid import UUID +from data_chain.entities.enum import UserMessageType, UserStatus +from data_chain.entities.response_data import ( + ListUserMessageResponse, + UpdateUserMessageResponse, + DeleteUserMessageResponse +) +from data_chain.apps.service.session_service import get_user_sub, verify_user +from data_chain.apps.service.router_service import get_route_info +router = APIRouter(prefix='/usr_msg', tags=['User Message']) + + +@router.post('/list', response_model=ListUserMessageResponse, dependencies=[Depends(verify_user)]) +async def list_user_msgs_by_user_sub( + user_sub: Annotated[str, Depends(get_user_sub)], + msg_type: Annotated[UserMessageType, Query(alias="msgType")], +): + return ListUserMessageResponse() + + +@router.put('', response_model=UpdateUserMessageResponse, dependencies=[Depends(verify_user)]) +async def update_user_msg_by_msg_id( + user_sub: Annotated[str, Depends(get_user_sub)], + msg_id: Annotated[UUID, Query(alias="msgId")], + msg_status: Annotated[UserStatus, Query(alias="msgStatus")]): + return UpdateUserMessageResponse() + + +@router.delete('', response_model=DeleteUserMessageResponse, dependencies=[Depends(verify_user)]) +async def delete_user_msg_by_msg_ids( + user_sub: Annotated[str, Depends(get_user_sub)], + msg_ids: Annotated[list[UUID], Body(alias="msgIds")]): + return DeleteUserMessageResponse() diff --git a/data_chain/apps/service/acc_testing_service.py b/data_chain/apps/service/acc_testing_service.py new file mode 100644 index 0000000..92e8697 --- /dev/null +++ b/data_chain/apps/service/acc_testing_service.py @@ -0,0 +1,203 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. +import aiofiles +from fastapi import APIRouter, Depends, Query, Body, File, UploadFile +import uuid +import traceback +import os +from data_chain.entities.request_data import ( + ListTestingRequest, + CreateTestingRequest, + UpdateTestingRequest +) +from data_chain.entities.response_data import ( + DatasetTesting, + ListTestingMsg, + TestingTestCase +) +from data_chain.apps.base.convertor import Convertor +from data_chain.apps.service.task_queue_service import TaskQueueService +from data_chain.manager.knowledge_manager import KnowledgeBaseManager +from data_chain.manager.task_manager import TaskManager +from data_chain.manager.task_report_manager import TaskReportManager +from data_chain.manager.dataset_manager import DatasetManager +from data_chain.manager.qa_manager import QAManager +from data_chain.manager.testing_manager import TestingManager +from data_chain.manager.testcase_manager import TestCaseManager +from data_chain.manager.team_manager import TeamManager +from data_chain.manager.role_manager import RoleManager +from data_chain.stores.minio.minio import MinIO +from data_chain.entities.enum import TestingStatus, TaskType, TaskStatus +from data_chain.entities.common import IMPORT_DATASET_PATH_IN_OS, IMPORT_DATASET_PATH_IN_MINIO +from data_chain.stores.database.database import DataSetEntity +from data_chain.logger.logger import logger as logging + + +class TestingService: + @staticmethod + async def validate_user_action_to_testing(user_sub: str, testing_id: uuid.UUID, action: str) -> bool: + """验证用户对测试的操作权限""" + try: + testing_entity = await TestingManager.get_testing_by_testing_id(testing_id) + if not testing_entity: + raise Exception("测试不存在") + action_entity = await RoleManager.get_action_by_team_id_user_sub_and_action(user_sub, testing_entity.team_id, action) + if not action_entity: + return False + return True + except Exception as e: + err = "验证用户对测试的操作权限失败" + logging.exception("[TestingService] %s", err) + raise e + + @staticmethod + async def list_testing_by_kb_id(req: ListTestingRequest) -> ListTestingMsg: + """根据知识库ID查询测试""" + try: + total, dataset_ids = await TestingManager.list_testing_unique_dataset_ids(req) + dataset_entities = await DatasetManager.list_datasets_by_dataset_ids(dataset_ids) + dataset_dict = {dataset.dataset_id: dataset for dataset in dataset_entities} + dataset_testings = [] + llm = Convertor.convert_llm_config_to_llm() + testing_ids = [] + for dataset_id in dataset_ids: + dataset_entity = dataset_dict.get(dataset_id) + testing_entities = await TestingManager.list_testing_by_dataset_id(dataset_id) + dataset_testing = DatasetTesting( + dataset_id=dataset_entity.id, + dataset_name=dataset_entity.name, + testings=[] + ) + for testing_entity in testing_entities: + testing = Convertor.convert_testing_entity_to_testing(testing_entity) + testing.llm = llm + dataset_testing.testings.append(testing) + testing_ids.append(testing_entity.testing_id) + dataset_testings.append(dataset_testing) + task_entities = await TaskManager.list_current_tasks_by_op_ids(testing_ids) + task_dict = {task.op_id: task for task in task_entities} + task_report_entities = await TaskReportManager.list_current_task_report_by_task_ids( + [task.task_id for task in task_entities] + ) + task_report_dict = {task_report.task_id: task_report for task_report in task_report_entities} + for dataset_testing in dataset_testings: + for testing in dataset_testing.testings: + task_entity = task_dict.get(testing.op_id) + if task_entity: + task_report_entity = task_report_dict.get(task_entity.task_id, None) + task = await Convertor.convert_task_entity_to_task(task_entity, task_report_entity) + testing.task = task + + list_testing_msg = ListTestingMsg( + total=total, + dataset_testings=dataset_testings + ) + return list_testing_msg + except Exception as e: + err = "查询测试失败" + logging.exception("[TestingService] %s", err) + raise e + + @staticmethod + async def list_testcase_by_testing_id(testing_id: uuid.UUID) -> TestingTestCase: + """根据测试ID查询测试用例""" + try: + testcase_entities = await TestCaseManager.list_test_case_by_testing_id(testing_id) + testcases = [] + for testcase_entity in testcase_entities: + testcases.append(Convertor.convert_test_case_entity_to_test_case(testcase_entity)) + testing_entity = await TestingManager.get_testing_by_testing_id(testing_id) + testing_testcase = TestingTestCase( + ave_score=testing_entity.ave_score, + ave_pre=testing_entity.ave_pre, + ave_rec=testing_entity.ave_rec, + ave_fai=testing_entity.ave_fai, + ave_rel=testing_entity.ave_rel, + ave_lcs=testing_entity.ave_lcs, + ave_leve=testing_entity.ave_leve, + ave_jac=testing_entity.ave_jac, + test_cases=testcases + ) + return testing_testcase + except Exception as e: + err = "查询测试用例失败" + logging.exception("[TestingService] %s", err) + raise e + + @staticmethod + async def generate_testing_report_download_url(testing_id: uuid.UUID) -> str: + """生成测试报告下载链接""" + try: + task_entity = await TaskManager.get_current_task_by_op_id(testing_id) + if not task_entity: + raise Exception("任务不存在") + download_url = MinIO.generate_download_link( + bucket_name=IMPORT_DATASET_PATH_IN_MINIO, + object_name=str(task_entity.task_id), + ) + return download_url + except Exception as e: + err = "生成测试报告下载链接失败" + logging.exception("[TestingService] %s", err) + raise e + + @staticmethod + async def create_testing(user_sub: str, req: CreateTestingRequest) -> uuid.UUID: + """创建测试""" + try: + dataset_entity = await DatasetManager.get_dataset_by_dataset_id(req.dataset_id) + testing_entity = await Convertor.convert_create_testing_request_to_testing_entity(user_sub, dataset_entity.team_id, dataset_entity.kb_id, req) + testing_entity = await TestingManager.add_testing(testing_entity) + task_id = await TaskQueueService.init_task(TaskType.TESTING_RUN.value, testing_entity.id) + return task_id + except Exception as e: + err = "创建测试失败" + logging.exception("[TestingService] %s", err) + raise e + + @staticmethod + async def run_testing_by_testing_id(testing_id: uuid.UUID, run: bool) -> None: + """运行测试""" + try: + testing_entity = await TestingManager.get_testing_by_testing_id(testing_id) + if run: + if testing_entity.status != TestingStatus.IDLE.value: + return None + task_id = await TaskQueueService.init_task(TaskType.TESTING_RUN.value, testing_entity.id) + else: + task_entity = await TaskManager.get_current_task_by_op_id(testing_id) + if not task_entity: + return None + if task_entity.status != TaskStatus.PENDING.value and task_entity.status != TaskStatus.RUNNING.value: + return None + await TaskQueueService.stop_task(task_entity.id) + except Exception as e: + err = "运行测试失败" + logging.exception("[TestingService] %s", err) + raise e + + @staticmethod + async def update_testing_by_testing_id(testing_id: uuid.UUID, req: UpdateTestingRequest) -> uuid.UUID: + """更新测试""" + try: + testing_dict = await Convertor.convert_update_testing_request_to_dict(req) + testing_entity = await TestingManager.update_testing_by_testing_id(testing_id, testing_dict) + return testing_entity.id + except Exception as e: + err = "更新测试失败" + logging.exception("[TestingService] %s", err) + raise e + + @staticmethod + async def delete_testing_by_testing_ids(testing_ids: list[uuid.UUID]) -> list[uuid.UUID]: + """删除测试""" + try: + task_entities = await TaskManager.list_current_tasks_by_op_ids(testing_ids) + for task_entity in task_entities: + await TaskQueueService.stop_task(task_entity.id) + testing_entities = await TestingManager.update_testing_by_testing_ids(testing_ids, {"status": TestingStatus.DELETED.value}) + testing_ids = [testing_entity.id for testing_entity in testing_entities] + return testing_ids + except Exception as e: + err = "删除测试失败" + logging.exception("[TestingService] %s", err) + raise e diff --git a/data_chain/apps/service/chunk_service.py b/data_chain/apps/service/chunk_service.py index 94de6de..0b11d46 100644 --- a/data_chain/apps/service/chunk_service.py +++ b/data_chain/apps/service/chunk_service.py @@ -1,377 +1,74 @@ # Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. +import aiofiles +from fastapi import APIRouter, Depends, Query, Body, File, UploadFile import uuid -import random -import time -import jieba import traceback -import asyncio - -import jieba.analyse -from data_chain.logger.logger import logger as logging -from data_chain.apps.service.llm_service import get_question_chunk_relation -from data_chain.models.constant import ChunkRelevance -from data_chain.manager.document_manager import DocumentManager, TemporaryDocumentManager +import os +from data_chain.entities.request_data import ( + ListChunkRequest, + UpdateChunkRequest, +) +from data_chain.entities.response_data import ( + Task, + Document, + ListChunkMsg +) +from data_chain.apps.base.convertor import Convertor +from data_chain.apps.service.task_queue_service import TaskQueueService from data_chain.manager.knowledge_manager import KnowledgeBaseManager -from data_chain.manager.chunk_manager import ChunkManager, TemporaryChunkManager -from data_chain.manager.vector_items_manager import VectorItemsManager, TemporaryVectorItemsManager -from data_chain.exceptions.exception import ChunkException -from data_chain.stores.postgres.postgres import PostgresDB -from data_chain.models.constant import embedding_model_out_dimensions, DocumentEmbeddingConstant -from data_chain.apps.service.embedding_service import Vectorize -from data_chain.config.config import config -from data_chain.apps.base.convertor.chunk_convertor import ChunkConvertor - - -async def _validate_chunk_belong_to_user(user_id: uuid.UUID, chunk_id: uuid.UUID) -> bool: - chunk_entity = await ChunkManager.select_by_chunk_id(chunk_id) - if chunk_entity is None: - raise ChunkException("Chunk not exist") - if chunk_entity.user_id != user_id: - raise ChunkException("Chunk not belong to user") - - -async def list_chunk(params, page_number, page_size): - doc_entity = await DocumentManager.select_by_id(params['document_id']) - if doc_entity is None or doc_entity.status == DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_STATUS_RUNNING: - return [], 0 - - chunk_entity_list, total = await ChunkManager.select_by_page(params, page_number, page_size) - chunk_dto_list = [] - for chunk_entity in chunk_entity_list: - chunk_dto = ChunkConvertor.convert_entity_to_dto(chunk_entity) - chunk_dto_list.append(chunk_dto) - return (chunk_dto_list, total) - - -async def switch_chunk(id, enabled): - await ChunkManager.update(id, {'enabled': enabled}) - doc_entity = await DocumentManager.select_by_id(id) - if doc_entity is None: - return - kb_entity = await KnowledgeBaseManager.select_by_id(doc_entity.kb_id) - if kb_entity is None: - return - try: - VectorItems = await PostgresDB.get_dynamic_vector_items_table( - kb_entity.vector_items_id, embedding_model_out_dimensions[kb_entity.embedding_model]) - except Exception as e: - raise ChunkException("Failed to get vector items table") - return - await VectorItemsManager.update_by_chunk_id(VectorItems, id, {'enabled': enabled}) - - -async def expand_chunk(document_id, global_offset, expand_method='all', max_tokens=1024, is_temporary_document=False): - # - # 这里返回的ex_chunk_tuple_list是个n*5二维列表 - # 内部的每个列表内容: [id, document_id, global_offset, tokens, text] - # - if is_temporary_document: - ex_chunk_tuple_list = await TemporaryChunkManager.fetch_surrounding_temporary_context(document_id, global_offset, expand_method=expand_method, max_tokens=max_tokens) - else: - ex_chunk_tuple_list = await ChunkManager.fetch_surrounding_context(document_id, global_offset, expand_method=expand_method, max_tokens=max_tokens) - return ex_chunk_tuple_list - - -async def filter_or_expand_chunk_by_llm(kb_id, content, document_para_dict, maxtokens): - exist_chunk_id_set = set() - new_document_para_dict = {} - for document_id in document_para_dict.keys(): - for chunk_tuple in document_para_dict[document_id]: - chunk_id = chunk_tuple[0] - exist_chunk_id_set.add(chunk_id) - for document_id in document_para_dict.keys(): - chunk_tuple_list = document_para_dict[document_id] - new_document_para_dict[document_id] = [] - st = 0 - en = 0 - while st < len(chunk_tuple_list): - chunk = '' - tokens = 0 - while en < len(chunk_tuple_list) and (en == st or chunk_tuple_list[en][2]-chunk_tuple_list[en-1][2] == 1): - tokens += chunk_tuple_list[en][3] - chunk += chunk_tuple_list[en][4] - en += 1 - relation = await get_question_chunk_relation(content, chunk) - fisrt_global_offset = chunk_tuple_list[st][2] - last_global_offset = chunk_tuple_list[en-1][2] - ex_chunk_tuple_list = [] - if relation == ChunkRelevance.IRRELEVANT: - if random.random() < 0.5: - ex_chunk_tuple_list = await ChunkManager.find_top_k_similar_chunks(kb_id, content, 1) - else: - kb_entity = await KnowledgeBaseManager.select_by_id(kb_id) - if kb_entity is None: - ex_chunk_tuple_list = [] - else: - embedding_model = kb_entity.embedding_model - vector_items_id = kb_entity.vector_items_id - dim = embedding_model_out_dimensions[embedding_model] - vector_items_table = await PostgresDB.get_dynamic_vector_items_table(vector_items_id, dim) - target_vector = await Vectorize.vectorize_embedding(content) - chunk_id_list = await VectorItemsManager.find_top_k_similar_vectors(vector_items_table, target_vector, kb_id, 1) - chunk_entity_list = await ChunkManager.select_by_chunk_ids(chunk_id_list) - for chunk_entity in chunk_entity_list: - ex_chunk_tuple_list.append((chunk_entity.id, chunk_entity.document_id, - chunk_entity.global_offset, chunk_entity.tokens, chunk_entity.text)) - elif relation == ChunkRelevance.WEAKLY_RELEVANT: - if random.random() < 0.5: - new_document_para_dict[document_id] += chunk_tuple_list[st:en] - elif relation == ChunkRelevance.RELEVANT_BUT_LACKS_PREVIOUS_CONTEXT: - new_document_para_dict[document_id] += chunk_tuple_list[st:en] - ex_chunk_tuple_list = await expand_chunk(document_id, fisrt_global_offset, expand_method='pre', max_tokens=maxtokens-tokens) - elif relation == ChunkRelevance.RELEVANT_BUT_LACKS_FOLLOWING_CONTEXT: - new_document_para_dict[document_id] += chunk_tuple_list[st:en] - ex_chunk_tuple_list = await expand_chunk(document_id, last_global_offset, expand_method='nex', max_tokens=maxtokens-tokens) - elif relation == ChunkRelevance.RELEVANT_BUT_LACKS_BOTH_CONTEXTS: - new_document_para_dict[document_id] += chunk_tuple_list[st:en] - ex_chunk_tuple_list = await expand_chunk(document_id, fisrt_global_offset, expand_method='pre', max_tokens=(maxtokens-tokens)//2) - ex_chunk_tuple_list += await expand_chunk(document_id, last_global_offset, expand_method='nex', max_tokens=(maxtokens-tokens)//2) - elif relation == ChunkRelevance.RELEVANT_AND_COMPLETE: - new_document_para_dict[document_id] += chunk_tuple_list[st:en] - for ex_chunk_tuple in ex_chunk_tuple_list: - chunk_id = ex_chunk_tuple[0] - if chunk_id not in exist_chunk_id_set: - new_document_para_dict[document_id].append(ex_chunk_tuple) - exist_chunk_id_set.add(chunk_id) - new_document_para_dict[document_id] = sorted(new_document_para_dict[document_id], key=lambda x: x[2]) - st = en - - -async def get_keywords_from_content(content: str, top_k: int = 3): - words = list(jieba.cut(content)) - keywords = set(jieba.analyse.extract_tags(content, topK=top_k)) - result = [] - exist_words = set() - for word in words: - if word in keywords and word not in exist_words: - exist_words.add(word) - result.append(word) - return result - - -async def rerank_chunks(content: str, chunks: list[str], top_k: int = 3): - pass - - -async def get_keywords_from_chunk(chunk: str, top_k=30): - try: - keywords = jieba.analyse.extract_tags(chunk, topK=top_k, withWeight=True) - except Exception as e: - logging.error(f"get_keywords_from_chunk error due to: {e}") - keywords = [] - return keywords - - -async def get_chunk_tuple(content, temporary_document_ids=None, kb_id=None, topk=3, return_t_cost=False): - # - # 这里返回的chunk_tuple_list是个n*5二维列表 - # 内部的每个列表内容: (id, document_id, global_offset, tokens, text) - # - t_cost_dict = {} - st = time.time() - if temporary_document_ids: - chunk_tuple_list = await TemporaryChunkManager.find_top_k_similar_chunks( - temporary_document_ids, - content, - max(topk // 2, 1)) - elif kb_id: - chunk_tuple_list = await ChunkManager.find_top_k_similar_chunks( - kb_id, - content, - max(topk//2, 1)) - else: - return [] - t_cost_dict['keyword_searching'] = time.time()-st - logging.info(f"关键字检索耗时: {time.time()-st}") - try: - st = time.time() - target_vector = await Vectorize.vectorize_embedding(content) - t_cost_dict['text_to_vector'] = time.time()-st - logging.info(f"向量化耗时: {time.time()-st}") - retry_times = 3 - if target_vector is not None: - st = time.time() - if temporary_document_ids: - chunk_id_list = [] - for i in range(retry_times): - try: - chunk_id_list = await asyncio.wait_for(TemporaryVectorItemsManager.find_top_k_similar_temporary_vectors( - target_vector, - temporary_document_ids, - topk-len(chunk_tuple_list) - ), - timeout=1 - ) - break - except Exception as e: - logging.error(f"检索临时向量时出错: {e}") - continue - chunk_entity_list = await TemporaryChunkManager.select_by_temporary_chunk_ids(chunk_id_list) - elif kb_id: - kb_entity = await KnowledgeBaseManager.select_by_id(kb_id) - if kb_entity is None: - return [] - embedding_model = kb_entity.embedding_model - vector_items_id = kb_entity.vector_items_id - dim = embedding_model_out_dimensions[embedding_model] - vector_items_table = await PostgresDB.get_dynamic_vector_items_table(vector_items_id, dim) - chunk_id_list = [] - for i in range(retry_times): - try: - chunk_id_list = await asyncio.wait_for(VectorItemsManager.find_top_k_similar_vectors(vector_items_table, target_vector, kb_id, topk-len(chunk_tuple_list)), timeout=1) - break - except Exception as e: - logging.error(f"检索向量时出错: {e}") - continue - chunk_entity_list = await ChunkManager.select_by_chunk_ids(chunk_id_list) - t_cost_dict['vector_searching'] = time.time()-st - logging.info(f"向量化检索耗时: {time.time()-st}") - st = time.time() - for chunk_entity in chunk_entity_list: - chunk_tuple_list.append((chunk_entity.id, chunk_entity.document_id, - chunk_entity.global_offset, chunk_entity.tokens, chunk_entity.text)) - t_cost_dict['vectors_related_texts'] = time.time()-st - logging.info(f"向量化结果关联片段耗时: {time.time()-st}") - if return_t_cost: - return t_cost_dict, chunk_tuple_list - return chunk_tuple_list - except Exception as e: - logging.error(f"片段关联失败: {e}") - return [] - - -async def get_similar_chunks( - content, kb_id=None, temporary_document_ids=None, max_tokens=4096, topk=3, devided_by_document_id=True, - return_t_cost=False): - try: - if return_t_cost: - t_cost_dict, chunk_tuple_list = await get_chunk_tuple(content=content, temporary_document_ids=temporary_document_ids, kb_id=kb_id, topk=topk, return_t_cost=return_t_cost) - else: - chunk_tuple_list = await get_chunk_tuple(content=content, temporary_document_ids=temporary_document_ids, kb_id=kb_id, topk=topk) - st = time.time() - document_para_dict = {} - exist_chunk_id_set = set() - for chunk_tuple in chunk_tuple_list: - document_id = chunk_tuple[1] - if document_id not in document_para_dict.keys(): - document_para_dict[document_id] = [] - if chunk_tuple[0] not in exist_chunk_id_set: - exist_chunk_id_set.add(chunk_tuple[0]) - document_para_dict[document_id].append(chunk_tuple) - logging.info(f"片段整合耗时: {time.time()-st}") - if len(chunk_tuple_list) == 0: - return [] - new_document_para_dict = {} - ex_tokens = max_tokens//len(exist_chunk_id_set) - st = time.time() - leave_ex_tokens = 0 - for document_id in document_para_dict.keys(): - global_offset_set = set() - new_document_para_dict[document_id] = [] - for chunk_tuple in document_para_dict[document_id]: - document_id = chunk_tuple[1] - global_offset = chunk_tuple[2] - tokens = chunk_tuple[3] - leave_ex_tokens += ex_tokens - if temporary_document_ids: - ex_chunk_tuple_list = await expand_chunk(document_id, global_offset, expand_method='all', max_tokens=leave_ex_tokens-tokens, is_temporary_document=True) - elif kb_id: - ex_chunk_tuple_list = await expand_chunk(document_id, global_offset, expand_method='all', max_tokens=leave_ex_tokens-tokens) - ex_chunk_tuple_list.append(chunk_tuple) - for ex_chunk_tuple in ex_chunk_tuple_list: - global_offset = ex_chunk_tuple[2] - if global_offset not in global_offset_set: - new_document_para_dict[document_id].append(ex_chunk_tuple) - global_offset_set.add(global_offset) - leave_ex_tokens -= ex_chunk_tuple[3] - if leave_ex_tokens <= 0: - leave_ex_tokens = 0 - new_document_para_dict[document_id] = sorted(new_document_para_dict[document_id], key=lambda x: x[2]) - if return_t_cost: - t_cost_dict['text_expanding'] = time.time()-st - logging.info(f"上下文关联耗时: {time.time()-st}") - # if config['MODEL_ENH']: - # document_para_dict = await filter_or_expand_chunk_by_llm(kb_id, content, new_document_para_dict, ex_tokens) - # else: - # document_para_dict = new_document_para_dict - document_para_dict = new_document_para_dict - if devided_by_document_id: - docuemnt_chunk_list = [] - for document_id in document_para_dict: - document_entity = None - if temporary_document_ids: - document_entity = await TemporaryDocumentManager.select_by_id(document_id) - elif kb_id: - document_entity = await DocumentManager.select_by_id(document_id) - if document_entity is not None: - document_name = document_entity.name - else: - document_name = '' - chunk_list = [] - st = 0 - en = 0 - while st < len(document_para_dict[document_id]): - text = '' - while en < len( - document_para_dict[document_id]) and ( - en == st or document_para_dict[document_id][en][2] - - document_para_dict[document_id][en - 1][2] == - 1): - text += document_para_dict[document_id][en][4] - en += 1 - chunk_list.append(text) - st = en - docuemnt_chunk_list.append({'document_name': document_name, 'chunk_list': chunk_list}) - if return_t_cost: - return t_cost_dict, docuemnt_chunk_list - return docuemnt_chunk_list - else: - chunk_list = [] - for document_id in document_para_dict: - st = 0 - en = 0 - while st < len(document_para_dict[document_id]): - text = '' - while en < len( - document_para_dict[document_id]) and ( - en == st or document_para_dict[document_id][en][2] - - document_para_dict[document_id][en - 1][2] == - 1): - text += document_para_dict[document_id][en][4] - en += 1 - chunk_list.append(text) - st = en - if return_t_cost: - return t_cost_dict, chunk_list - return chunk_list - except Exception as e: - logging.error(f"Get similar chun failed due to e: {e}") - logging.error(f"Get similar chun failed due to traceback: {traceback.format_exc()}") - return [] - - -async def get_similar_full_text( - content, kb_id=None, temporary_document_ids=None, topk=3): - try: - chunk_tuple_list = await get_chunk_tuple(content=content, temporary_document_ids=temporary_document_ids, kb_id=kb_id, topk=topk) - full_text_list = [] - document_id_set = set() - for chunk_tuple in chunk_tuple_list: - if chunk_tuple[1] not in document_id_set: - document_id_set.add(chunk_tuple[1]) - if temporary_document_ids: - document_entity = await TemporaryDocumentManager.select_by_id(chunk_tuple[1]) - full_text_list.append(document_entity.full_text) - elif kb_id: - document_entity = await DocumentManager.select_by_id(chunk_tuple[1]) - full_text_list.append(document_entity.full_text) - logging.info(f"Get similar full text success, full_text_list: {full_text_list}") - return full_text_list - except Exception as e: - logging.error(f"Get similar full text failed due to e: {e}") - logging.error(f"Get similar full text failed due to traceback: {traceback.format_exc()}") - return [] +from data_chain.manager.document_type_manager import DocumentTypeManager +from data_chain.manager.document_manager import DocumentManager +from data_chain.manager.chunk_manager import ChunkManager +from data_chain.manager.role_manager import RoleManager +from data_chain.manager.task_manager import TaskManager +from data_chain.manager.task_report_manager import TaskReportManager +from data_chain.stores.database.database import DocumentEntity +from data_chain.stores.minio.minio import MinIO +from data_chain.entities.enum import ParseMethod, DataSetStatus, DocumentStatus, TaskType +from data_chain.entities.common import DOC_PATH_IN_OS, DOC_PATH_IN_MINIO, DEFAULt_DOC_TYPE_ID +from data_chain.logger.logger import logger as logging -def split_chunk(chunk): - return list(jieba.cut(str(chunk))) +class ChunkService: + """Chunk Service""" + async def validate_user_action_to_chunk(user_sub: str, chunk_id: uuid.UUID, action: str) -> bool: + """验证用户对分片的操作权限""" + try: + chunk_entity = await ChunkManager.get_chunk_by_chunk_id(chunk_id) + if chunk_entity is None: + err = f"分片不存在,分片ID: {chunk_id}" + logging.error("[ChunkService] %s", err) + return False + action_entity = await RoleManager.get_action_by_team_id_user_sub_and_action( + user_sub, chunk_entity.team_id, action) + if action_entity is None: + return False + return True + except Exception as e: + err = "验证用户对分片的操作权限失败" + logging.exception("[ChunkService] %s", err) + raise e + + async def list_chunks_by_document_id(req: ListChunkRequest) -> ListChunkMsg: + """根据文档ID列出分片""" + try: + total, chunk_entities = await ChunkManager.list_chunk(req) + chunks = [] + for chunk_entity in chunk_entities: + chunk = await Convertor.convert_chunk_entity_to_chunk(chunk_entity) + chunks.append(chunk) + return ListChunkMsg(total=total, chunks=chunks) + except Exception as e: + err = "根据文档ID列出分片失败" + logging.exception("[ChunkService] %s", err) + raise e + + async def update_chunk_by_id(chunk_id: uuid.UUID, req: UpdateChunkRequest) -> bool: + try: + chunk_dict = await Convertor.convert_update_chunk_request_to_dict(req) + await ChunkManager.update_chunk_by_chunk_id(chunk_id, chunk_dict) + return True + except Exception as e: + err = "更新分片失败" + logging.exception("[ChunkService] %s", err) + raise e diff --git a/data_chain/apps/service/dataset_service.py b/data_chain/apps/service/dataset_service.py new file mode 100644 index 0000000..456f183 --- /dev/null +++ b/data_chain/apps/service/dataset_service.py @@ -0,0 +1,337 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. +import aiofiles +from fastapi import APIRouter, Depends, Query, Body, File, UploadFile +import uuid +import traceback +import os +from data_chain.entities.request_data import ( + ListDatasetRequest, + ListDataInDatasetRequest, + CreateDatasetRequest, + UpdateDatasetRequest, + UpdateDataRequest +) +from data_chain.entities.response_data import ( + Task, + Document, + ListDatasetMsg, + ListDataInDatasetMsg +) +from data_chain.apps.base.convertor import Convertor +from data_chain.apps.service.task_queue_service import TaskQueueService +from data_chain.manager.knowledge_manager import KnowledgeBaseManager +from data_chain.manager.task_manager import TaskManager +from data_chain.manager.task_report_manager import TaskReportManager +from data_chain.manager.dataset_manager import DatasetManager +from data_chain.manager.qa_manager import QAManager +from data_chain.manager.testing_manager import TestingManager +from data_chain.manager.team_manager import TeamManager +from data_chain.manager.role_manager import RoleManager +from data_chain.stores.minio.minio import MinIO +from data_chain.entities.enum import ParseMethod, DataSetStatus, DocumentStatus, TaskType, TaskStatus +from data_chain.entities.common import IMPORT_DATASET_PATH_IN_OS, IMPORT_DATASET_PATH_IN_MINIO +from data_chain.stores.database.database import DataSetEntity +from data_chain.logger.logger import logger as logging + + +class DataSetService: + """数据集服务""" + @staticmethod + async def validate_user_action_to_dataset( + user_sub: str, dataset_id: uuid.UUID, action: str) -> bool: + """验证用户在数据集中的操作权限""" + try: + dataset_entity = await DatasetManager.get_dataset_by_dataset_id(dataset_id) + if dataset_entity is None: + logging.exception("[DataSetService] 数据集不存在") + raise Exception("Dataset not exist") + action_entity = await RoleManager.get_action_by_team_id_user_sub_and_action( + user_sub, dataset_entity.team_id, action) + if action_entity is None: + return False + return True + except Exception as e: + err = "验证用户在数据集中的操作权限失败" + logging.exception("[DataSetService] %s", err) + raise e + + @staticmethod + async def validate_user_action_to_data( + user_sub: str, data_id: uuid.UUID, action: str) -> bool: + """验证用户在数据中的操作权限""" + try: + data_entity = await QAManager.get_data_by_data_id(data_id) + if data_entity is None: + logging.exception("[DataSetService] 数据不存在") + raise Exception("Data not exist") + dataset_entity = await DatasetManager.get_dataset_by_dataset_id(data_entity.dataset_id) + action_entity = await RoleManager.get_action_by_team_id_user_sub_and_action( + user_sub, dataset_entity.team_id, action) + if action_entity is None: + return False + return True + except Exception as e: + err = "验证用户在数据中的操作权限失败" + logging.exception("[DataSetService] %s", err) + raise e + + @staticmethod + async def list_dataset_by_kb_id(req: ListDatasetRequest) -> ListDatasetMsg: + """根据知识库ID列出数据集""" + try: + total, dataset_entities = await DatasetManager.list_dataset(req) + data_ids = [dataset_entity.id for dataset_entity in dataset_entities] + llm = await Convertor.convert_llm_config_to_llm() + task_entities = await TaskManager.list_current_tasks_by_op_ids(data_ids) + task_dict = {task_entity.op_id: task_entity for task_entity in task_entities} + task_ids = [task_entity.id for task_entity in task_entities] + task_report_entities = await TaskReportManager.list_current_task_report_by_task_ids(task_ids) + task_report_dict = {task_report_entity.task_id: task_report_entity + for task_report_entity in task_report_entities} + for dataset_entity in dataset_entities: + task_entity = task_dict.get(dataset_entity.id, None) + task_report = task_report_dict.get(task_entity.id, None) if task_entity else None + dataset = await Convertor.convert_dataset_entity_to_dataset(dataset_entity, task_report) + dataset.llm = llm + dataset.task = task_entity + return ListDatasetMsg(total=total, datasets=dataset_entities) + except Exception as e: + err = "根据知识库ID列出数据集失败" + logging.exception("[DataSetService] %s", err) + raise e + + @staticmethod + async def list_data_in_dataset(req: ListDatasetRequest) -> ListDataInDatasetMsg: + """根据数据集ID列出数据""" + try: + total, qa_entities = await QAManager.list_data_in_dataset(req) + datas = [] + for qa_entity in qa_entities: + data = await Convertor.convert_qa_entity_to_data(qa_entity) + datas.append(data) + return ListDataInDatasetMsg(total=total, datas=datas) + except Exception as e: + err = "根据数据集ID列出数据失败" + logging.exception("[DataSetService] %s", err) + raise e + + @staticmethod + async def is_dataset_have_testing(dataset_id: uuid.UUID) -> bool: + """判断数据集是否有测试数据""" + try: + dataset_entity = await TestingManager.list_testing_by_dataset_id(dataset_id) + if dataset_entity is None: + return False + return True + except Exception as e: + err = "判断数据集是否有测试数据失败" + logging.exception("[DataSetService] %s", err) + raise e + + @staticmethod + async def generate_dataset_download_url(task_id: uuid.UUID) -> str: + """生成数据集下载链接""" + try: + download_url = await MinIO.generate_download_link( + bucket_name=IMPORT_DATASET_PATH_IN_MINIO, + object_name=str(task_id), + ) + return download_url + except Exception as e: + err = "生成数据集下载链接失败" + logging.exception("[DataSetService] %s", err) + raise e + + @staticmethod + async def create_dataset(user_sub: str, req: CreateDatasetRequest) -> uuid.UUID: + """创建数据集""" + try: + kb_entity = await KnowledgeBaseManager.get_knowledge_base_by_kb_id(req.kb_id) + if kb_entity is None: + err = "知识库不存在" + logging.exception("[DataSetService] %s", err) + raise Exception(err) + dataset_entity = await Convertor.convert_create_dataset_request_to_dataset_entity(user_sub, kb_entity.id, req) + await DatasetManager.add_dataset(dataset_entity) + dataset_doc_entities = [] + for doc_id in req.document_ids: + dataset_doc_entity = await Convertor.convert_dataset_id_and_doc_id_to_dataset_doc_entity( + dataset_entity.id, doc_id) + dataset_doc_entities.append(dataset_doc_entity) + await DatasetManager.add_dataset_docs(dataset_doc_entities) + task_id = await TaskQueueService.init_task(TaskType.DATASET_GENERATE.value, dataset_entity.id) + return task_id + except Exception as e: + err = "创建数据集失败" + logging.exception("[DataSetService] %s", err) + raise e + + @staticmethod + async def import_dataset( + user_sub: str, kb_id: uuid.UUID, dataset_packages: list[UploadFile] = File(...)) -> uuid.UUID: + """导入数据集""" + try: + if len(dataset_packages.filename) > 10: + err = "数据集包名称过长" + logging.exception("[DataSetService] %s", err) + raise Exception(err) + dataset_package_sz = 0 + for dataset_package in dataset_packages: + dataset_package_sz += dataset_package.size + if dataset_package_sz > 1024 * 1024 * 1024: + err = "数据集包大小超过限制" + logging.exception("[DataSetService] %s", err) + raise Exception(err) + kb_entity = await KnowledgeBaseManager.get_knowledge_base_by_kb_id(kb_id) + if kb_entity is None: + err = "知识库不存在" + logging.exception("[DataSetService] %s", err) + raise Exception(err) + dataset_import_task_ids = [] + for dataset_package in dataset_packages: + tmp_path = os.join(IMPORT_DATASET_PATH_IN_OS, str(uuid.uuid4())) + file_name = dataset_package.filename + file_path = os.join(tmp_path, file_name) + try: + async with aiofiles.open(file_path, 'wb') as out_file: + content = await dataset_package.read() + await out_file.write(content) + except Exception as e: + err = "保存数据集包失败" + logging.exception("[DataSetService] %s", err) + raise e + try: + dataset_entity = DataSetEntity( + team_id=kb_entity.team_id, + kb_id=kb_entity.id, + author_id=user_sub, + author_name=user_sub, + llm_id=None, + name=file_name, + description="", + data_cnt=0, + is_data_cleared=False, + is_chunk_related=False, + status=DataSetStatus.IDLE.value, + score=-1 + ) + await DatasetManager.add_dataset(dataset_entity) + except Exception as e: + err = "创建数据集失败" + logging.exception("[DataSetService] %s", err) + continue + + try: + await MinIO.put_object( + bucket_name=IMPORT_DATASET_PATH_IN_MINIO, + object_name=dataset_entity.id, + file_path=file_path + ) + except Exception as e: + err = "上传数据集包到minio失败" + logging.exception("[DataSetService] %s", err) + continue + task_id = await TaskQueueService.init_task(TaskType.DATASET_IMPORT.value, dataset_entity.id) + if task_id: + dataset_import_task_ids.append(task_id) + else: + err = "初始化任务失败" + logging.exception("[DataSetService] %s", err) + continue + return dataset_import_task_ids + except Exception as e: + err = "导入数据集失败" + logging.exception("[DataSetService] %s", err) + raise e + + @staticmethod + async def export_dataset(dataset_ids: list[uuid.UUID]) -> uuid.UUID: + """导出数据集""" + try: + dataset_export_task_ids = [] + for dataset_id in dataset_ids: + task_id = await TaskQueueService.init_task(TaskType.DATASET_EXPORT.value, dataset_entity.id) + if task_id: + dataset_export_task_ids.append(task_id) + else: + err = "初始化任务失败" + logging.exception("[DataSetService] %s", err) + raise Exception(err) + return dataset_export_task_ids + except Exception as e: + err = "导出数据集失败" + logging.exception("[DataSetService] %s", err) + raise e + + @staticmethod + async def generate_dataset_by_id(dataset_id: uuid.UUID, generate: bool) -> uuid.UUID: + """生成数据集""" + try: + dataset_entity = await DatasetManager.get_dataset_by_dataset_id(dataset_id) + if generate: + if dataset_entity.status == DataSetStatus.IDLE.value: + task_id = await TaskQueueService.init_task(TaskType.DATASET_GENERATE.value, dataset_id) + else: + if dataset_entity.status == DataSetStatus.PENDING.value or dataset_entity.status == DataSetStatus.GENERATING.value: + task_entity = await TaskManager.get_current_task_by_op_id(dataset_id) + task_id = await TaskQueueService.stop_task(task_entity.id) + return task_id + except Exception as e: + err = "生成数据集失败" + logging.exception("[DataSetService] %s", err) + raise e + + @staticmethod + async def update_dataset_by_dataset_id( + dataset_id: uuid.UUID, req: UpdateDatasetRequest) -> uuid.UUID: + """更新数据集""" + try: + dataset_entity = await DatasetManager.update_dataset_by_dataset_id(dataset_id, {"name": req.dataset_name, "description": req.description}) + return dataset_entity.id + except Exception as e: + err = "更新数据集失败" + logging.exception("[DataSetService] %s", err) + raise e + + @staticmethod + async def update_data(data_id: uuid.UUID, req: UpdateDataRequest) -> uuid.UUID: + """更新数据""" + try: + qa_entity = await QAManager.update_qa_by_qa_id(data_id, {"question": req.question, "answer": req.answer}) + return qa_entity.id + except Exception as e: + err = "更新数据失败" + logging.exception("[DataSetService] %s", err) + raise e + + @staticmethod + async def delete_dataset_by_dataset_ids( + dataset_ids: list[uuid.UUID]) -> list[uuid.UUID]: + """根据数据集ID删除数据集""" + try: + task_entities = await TaskManager.list_current_tasks_by_op_ids(dataset_ids) + for task_entity in task_entities: + if task_entity.status == TaskStatus.PENDING.value or task_entity.status == TaskStatus.RUNNING.value: + await TaskQueueService.stop_task(task_entity.id) + dataset_entities = await DatasetManager.update_dataset_by_dataset_ids( + dataset_ids, {"status": DataSetStatus.DELETED.value}) + dataset_ids = [dataset_entity.id for dataset_entity in dataset_entities] + return dataset_ids + except Exception as e: + err = "删除数据集失败" + logging.exception("[DataSetService] %s", err) + raise e + + @staticmethod + async def delete_data_by_data_ids( + data_ids: list[uuid.UUID]) -> list[uuid.UUID]: + """根据数据ID删除数据""" + try: + data_entities = await QAManager.update_qa_by_qa_ids( + data_ids, {"status": DocumentStatus.DELETED.value}) + data_ids = [data_entity.id for data_entity in data_entities] + return data_ids + except Exception as e: + err = "删除数据失败" + logging.exception("[DataSetService] %s", err) + raise e diff --git a/data_chain/apps/service/document_service.py b/data_chain/apps/service/document_service.py index 556e822..7f7fc8a 100644 --- a/data_chain/apps/service/document_service.py +++ b/data_chain/apps/service/document_service.py @@ -1,422 +1,240 @@ # Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. +import aiofiles +from fastapi import APIRouter, Depends, Query, Body, File, UploadFile import uuid import traceback -from typing import Dict, List, Tuple -from fastapi import File, UploadFile import os -import shutil -import secrets -from data_chain.logger.logger import logger as logging -from data_chain.apps.base.convertor.task_convertor import TaskConvertor -from data_chain.apps.base.task.document_task_handler import DocumentTaskHandler -from data_chain.apps.base.task.task_handler import TaskRedisHandler, TaskHandler +from data_chain.entities.request_data import ( + ListDocumentRequest, + UpdateDocumentRequest +) +from data_chain.entities.response_data import ( + Task, + Document, + ListDocumentMsg, + ListDocumentResponse +) +from data_chain.apps.base.convertor import Convertor +from data_chain.apps.service.task_queue_service import TaskQueueService from data_chain.manager.knowledge_manager import KnowledgeBaseManager -from data_chain.manager.document_manager import DocumentManager, TemporaryDocumentManager from data_chain.manager.document_type_manager import DocumentTypeManager -from data_chain.manager.chunk_manager import ChunkManager, TemporaryChunkManager -from data_chain.manager.vector_items_manager import VectorItemsManager, TemporaryVectorItemsManager -from data_chain.manager.task_manager import TaskManager, TaskStatusReportManager -from data_chain.models.service import DocumentDTO, TemporaryDocumentDTO, TaskDTO -from data_chain.apps.service.embedding_service import Vectorize -from data_chain.models.constant import DocumentEmbeddingConstant, OssConstant, TaskConstant, TaskActionEnum, \ - ParseExtensionEnum,TemporaryDocumentStatusEnum +from data_chain.manager.document_manager import DocumentManager +from data_chain.manager.role_manager import RoleManager +from data_chain.manager.task_manager import TaskManager +from data_chain.manager.task_report_manager import TaskReportManager +from data_chain.stores.database.database import DocumentEntity from data_chain.stores.minio.minio import MinIO -from data_chain.apps.base.convertor.document_convertor import DocumentConvertor -from data_chain.exceptions.exception import DocumentException, KnowledgeBaseException -from data_chain.models.constant import DocumentEmbeddingConstant, embedding_model_out_dimensions -from data_chain.stores.postgres.postgres import PostgresDB, DocumentEntity, TemporaryDocumentEntity, TaskEntity -from data_chain.config.config import config - - -async def _validate_doucument_belong_to_user(user_id, document_id) -> bool: - document_entity = await DocumentManager.select_by_id(document_id) - if document_entity is None: - raise DocumentException("Document not exist") - if document_entity.user_id != user_id: - raise DocumentException("Document not belong to user") - - -async def list_documents_by_knowledgebase_id(params, page_number, page_size) -> Tuple[List[DocumentDTO], int]: - result_list = [] - try: - total, document_entity_list = await DocumentManager.select_by_page(params, page_number, page_size) - doc_ids=[] - doc_type_ids=[] - for document_entity in document_entity_list: - doc_ids.append(document_entity.id) - doc_type_ids.append(document_entity.type_id) - doc_type_ids = list(set(doc_type_ids)) - document_type_entity_list = await DocumentTypeManager.select_by_ids(doc_type_ids) - task_entity_list = await TaskManager.select_latest_task_by_op_ids(doc_ids) - task_ids=[] - for task_entity in task_entity_list: - task_ids.append(task_entity.id) - task_report_entity_list=await TaskStatusReportManager.select_latest_report_by_task_ids(task_ids) - document_type_dict={} - for document_type_entity in document_type_entity_list: - document_type_dict[document_type_entity.id]=document_type_entity - task_dict={} - for task_entity in task_entity_list: - task_dict[task_entity.op_id]=task_entity - task_report_dict={} - for task_report_entity in task_report_entity_list: - task_report_dict[task_report_entity.task_id]=task_report_entity - for document_entity in document_entity_list: - document_type_entity=document_type_dict[document_entity.type_id] - doc_dto = DocumentConvertor.convert_entity_to_dto(document_entity, document_type_entity) - task_entity=task_dict.get(document_entity.id,None) - task_dto=None - if task_entity is not None: - task_report_entity=task_report_dict.get(task_entity.id,None) - task_report_entity_list=[] - if task_report_entity is not None: - task_report_entity_list=[task_report_entity] - task_dto = TaskConvertor.convert_entity_to_dto(task_entity, task_report_entity_list) - if task_dto is not None: - doc_dto.task = task_dto - result_list.append(doc_dto) - return (result_list, total) - except Exception as e: - logging.error("List document by kb_id={} error: {}".format(params['kb_id'], e)) - raise e - - -async def update_document(tmp_dict) -> DocumentDTO: - try: - old_document_entity = await DocumentManager.select_by_id(tmp_dict['id']) - if 'type_id' in tmp_dict: - document_type_entity = await DocumentTypeManager.select_by_id(tmp_dict['type_id']) - if document_type_entity.kb_id is not None and old_document_entity.kb_id != document_type_entity.kb_id: - raise KnowledgeBaseException("Update document error.") - await DocumentManager.update(tmp_dict['id'], tmp_dict) - - new_document_entity = await DocumentManager.select_by_id(tmp_dict['id']) - document_type_entity = await DocumentTypeManager.select_by_id(new_document_entity.type_id) - return DocumentConvertor.convert_entity_to_dto(new_document_entity, document_type_entity) - except Exception as e: - logging.error("Update document error: {}".format(e)) - raise KnowledgeBaseException("Update document error.") - - -async def stop_document_parse_task(doc_id: uuid.UUID) -> TaskDTO: - try: - doc_entity = await DocumentManager.select_by_id(doc_id) - if doc_entity.status == DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_STATUS_PENDING: - return - task_entity = await TaskManager.select_by_op_id(doc_id) - task_id = task_entity.id - await TaskHandler.restart_or_clear_task(task_id, TaskActionEnum.CANCEL) - except Exception as e: - logging.error("Stop docuemnt parse task={} error: {}".format(task_id, e)) - - -async def init_document_parse_task(doc_id): - update_dict = {'status': DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_STATUS_RUNNING} - document_entity = await DocumentManager.update(doc_id, update_dict) - - # 写入task记录 - if document_entity is None: - return False - task_entity = await TaskManager.insert(TaskEntity(user_id=document_entity.user_id, - op_id=doc_id, - type=TaskConstant.PARSE_DOCUMENT, - retry=0, - status=TaskConstant.TASK_STATUS_PENDING)) - # 提交redis任务队列 - TaskRedisHandler.put_task_by_tail(config['REDIS_PENDING_TASK_QUEUE_NAME'], str(task_entity.id)) - return True - - -async def run_document(update_dict) -> DocumentDTO: - try: - doc_id = update_dict['id'] - run = update_dict['run'] - if run == DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_RUN: - await init_document_parse_task(doc_id) - elif run == DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_CANCEL: - await stop_document_parse_task(doc_id) - updated_document_entity = await DocumentManager.select_by_id(doc_id) - document_type_entity = await DocumentTypeManager.select_by_id(updated_document_entity.type_id) - return DocumentConvertor.convert_entity_to_dto(updated_document_entity, document_type_entity) - except Exception as e: - logging.error("Embedding document ({}) error: {}".format(update_dict['run'], e)) - raise DocumentException(f"Embedding document ({update_dict['run']}) error.") - - -async def switch_document(document_id, enabled) -> DocumentDTO: - try: - await DocumentManager.update(document_id, {'enabled': enabled}) - - updated_document_entity = await DocumentManager.select_by_id(document_id) - document_type_entity = await DocumentTypeManager.select_by_id(updated_document_entity.type_id) - return DocumentConvertor.convert_entity_to_dto(updated_document_entity, document_type_entity) - except Exception as e: - logging.error("Switch document status ({}) error: {}".format(enabled, e)) - raise DocumentException(f"Switch document status ({enabled}) error.") - - -async def delete_document(ids: List[uuid.UUID]) -> int: - if len(ids) == 0: - return 0 - try: - # 删除document表的记录 - deleted_document_entity_list = await DocumentManager.select_by_ids(ids) - for deleted_document_entity in deleted_document_entity_list: - # 删除document_type表的记录 - if deleted_document_entity.status == DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_STATUS_RUNNING: - await stop_document_parse_task(deleted_document_entity.id) - await TaskManager.update_task_by_op_id(deleted_document_entity.id, {'status': TaskConstant.TASK_STATUS_DELETED}) - # 同步删除minIO里面的文件 - for deleted_document_entity in deleted_document_entity_list: - await MinIO.delete_object(OssConstant.MINIO_BUCKET_DOCUMENT, str(deleted_document_entity.id)) - deleted_cnt = await DocumentManager.delete_by_ids(ids) - # 修改kb里面的文档数量和文档大小 - knowledge_base_entity = await KnowledgeBaseManager.select_by_id(deleted_document_entity_list[0].kb_id) - total_cnt, total_sz = await DocumentManager.select_cnt_and_sz_by_kb_id(knowledge_base_entity.id) - update_dict = {'document_number': total_cnt, - 'document_size': total_sz} - await KnowledgeBaseManager.update(knowledge_base_entity.id, update_dict) - return deleted_cnt - except Exception as e: - logging.error(f"Delete document ({ids}) error: {e}") - raise DocumentException(f"Delete document ({ids}) error.") +from data_chain.entities.enum import ParseMethod, DataSetStatus, DocumentStatus, TaskType +from data_chain.entities.common import DOC_PATH_IN_OS, DOC_PATH_IN_MINIO, DEFAULt_DOC_TYPE_ID +from data_chain.logger.logger import logger as logging -async def submit_upload_document_task( - user_id: uuid.UUID, kb_id: uuid.UUID, files: List[UploadFile] = File(...)) -> bool: - target_dir = None - try: - # 创建目标目录 - file_upload_successfully_list = [] - target_dir = os.path.join(OssConstant.UPLOAD_DOCUMENT_SAVE_FOLDER, str(user_id), secrets.token_hex(16)) - if os.path.exists(target_dir): - shutil.rmtree(target_dir) - os.makedirs(target_dir) - for file in files: +class DocumentService: + """文档服务类""" + @staticmethod + async def validate_user_action_to_document(user_sub: str, doc_id: uuid.UUID, action: str) -> bool: + """验证用户对文档的操作权限""" + try: + doc_entity = await DocumentManager.get_document_by_doc_id(doc_id) + if doc_entity is None: + err = f"文档不存在, 文档ID: {doc_id}" + logging.error("[DocumentService] %s", err) + return False + action_entity = await RoleManager.get_action_by_team_id_user_sub_and_action( + user_sub, doc_entity.team_id, action) + if action_entity is None: + return False + return True + except Exception as e: + err = "验证用户对文档的操作权限失败" + logging.exception("[DocumentService] %s", err) + raise e + + @staticmethod + async def list_doc(req: ListDocumentRequest) -> ListDocumentMsg: + """列出文档""" + try: + (total, doc_entities) = await DocumentManager.list_document(req) + doc_ids = [doc_entity.id for doc_entity in doc_entities] + task_entities = await TaskManager.list_current_tasks_by_op_ids(doc_ids) + task_ids = [task_entity.id for task_entity in task_entities] + task_dict = {task_entity.op_id: task_entity for task_entity in task_entities} + task_report_entities = await TaskReportManager.list_current_task_report_by_task_ids(task_ids) + task_report_dict = {task_report_entity.task_id: task_report_entity for task_report_entity in + task_report_entities} + documents = [] + for doc_entity in doc_entities: + doc_type_entity = await DocumentTypeManager.get_document_type_by_id(doc_entity.type_id) + document = await Convertor.convert_document_entity_and_document_type_entity_to_document( + doc_entity, doc_type_entity) + if doc_entity.id in task_dict.keys(): + task_entity = task_dict[doc_entity.id] + task_report = task_report_dict.get(task_entity, None) + task = await Convertor.convert_task_entity_to_task(task_entity, task_report) + document.task = task + documents.append(document) + list_document_msg = ListDocumentMsg(total=total, documents=documents) + return list_document_msg + except Exception as e: + err = "列出文档失败" + logging.exception("[DocumentService] %s", err) + raise e + + @staticmethod + async def generate_doc_download_url(doc_id: uuid.UUID) -> str: + """生成文档下载链接""" + try: + download_url = await MinIO.generate_download_link( + bucket_name=DOC_PATH_IN_MINIO, + file_name=str(doc_id)) + return download_url + except Exception as e: + err = "生成文档下载链接失败" + logging.exception("[DocumentService] %s", err) + raise e + + @staticmethod + async def get_doc_name_and_extension(doc_id: uuid.UUID) -> tuple[str, str]: + """获取文档名称和扩展名""" + try: + doc_entity = await DocumentManager.get_document_by_doc_id(doc_id) + if doc_entity is None: + err = f"获取文档失败, 文档ID: {doc_id}" + logging.error("[DocumentService] %s", err) + raise ValueError(err) + return doc_entity.name, doc_entity.extension + except Exception as e: + err = "获取文档名称和扩展名失败" + logging.exception("[DocumentService] %s", err) + raise e + + @staticmethod + async def get_doc_report(doc_id: uuid.UUID) -> Document: + """获取文档报告""" + try: + doc_entity = await DocumentManager.get_document_by_doc_id(doc_id) + if doc_entity is None: + err = f"获取文档报告失败, 文档ID: {doc_id}" + logging.error("[DocumentService] %s", err) + raise ValueError(err) + task_entity = await TaskManager.get_current_task_by_op_id(doc_id) + if task_entity is None: + return '' + task_report_entities = await TaskReportManager.list_all_task_report_by_task_id(task_entity.id) + task_report = '' + for task_report_entity in task_report_entities: + task_report += f"任务报告ID: {task_report_entity.id}, " \ + f"任务报告内容: {task_report_entity.content}, " \ + f"任务报告创建时间: {task_report_entity.created_time}\n" + return task_report + except Exception as e: + err = "获取文档报告失败" + logging.exception("[DocumentService] %s", err) + raise e + + @staticmethod + async def upload_docs(user_sub: str, kb_id: uuid.UUID, docs: list[UploadFile]) -> list[uuid.UUID]: + """上传文档""" + kb_entity = await KnowledgeBaseManager.get_knowledge_base_by_kb_id(kb_id) + if kb_entity is None: + err = f"知识库不存在, 知识库ID: {kb_id}" + logging.error("[DocumentService] %s", err) + raise ValueError(err) + doc_entities = [] + for doc in docs: try: - # 1. 将文件写入本地stash目录 - document_file_path = await DocumentTaskHandler.save_document_file_to_local(target_dir, file) - except Exception as e: - logging.error(f"save_document_file_to_local error: {e}") - continue - kb_entity = await KnowledgeBaseManager.select_by_id(kb_id) - # 2. 更新document记录 - file_name = file.filename - if await DocumentManager.select_by_knowledge_base_id_and_file_name(kb_entity.id, file_name): - name = os.path.splitext(file_name)[0] - extension = os.path.splitext(file_name)[1] - file_name = name[:128]+'_'+secrets.token_hex(16)+extension - document_entity = await DocumentManager.insert( - DocumentEntity( - kb_id=kb_id, user_id=user_id, name=file_name, - extension=os.path.splitext(file.filename)[1], - size=os.path.getsize(document_file_path), - parser_method=kb_entity.default_parser_method, - type_id='00000000-0000-0000-0000-000000000000', + document_file_path = os.path.join(DOC_PATH_IN_OS, doc.filename) + async with aiofiles.open(document_file_path, "wb") as f: + content = await doc.read() + await f.write(content) + id = uuid.uuid4() + await MinIO.put_object( + bucket_name=DOC_PATH_IN_MINIO, + file_index=str(id), + file_path=document_file_path + ) + doc_entity = DocumentEntity( + id=id, + team_id=kb_entity.team_id, + kb_id=kb_entity.id, + author_id=user_sub, + author_name=user_sub, + name=doc.filename, + extension=doc.filename.split('.')[-1], + size=len(content), + parse_method=kb_entity.default_parse_method, + parse_relut_topology=None, chunk_size=kb_entity.default_chunk_size, + type_id=DEFAULt_DOC_TYPE_ID, enabled=True, - status=DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_STATUS_RUNNING) - ) - if not await MinIO.put_object(OssConstant.MINIO_BUCKET_DOCUMENT, str(document_entity.id), document_file_path): - logging.error(f"上传文件到minIO失败,文件名:{file.filename}") - await DocumentManager.delete_by_id(document_entity.id) - continue - # 3. 创建task表记录 - task_entity = await TaskManager.insert(TaskEntity(user_id=user_id, - op_id=document_entity.id, - type=TaskConstant.PARSE_DOCUMENT, - retry=0, - status=TaskConstant.TASK_STATUS_PENDING)) - # 4. 提交redis任务队列 - TaskRedisHandler.put_task_by_tail(config['REDIS_PENDING_TASK_QUEUE_NAME'], str(task_entity.id)) - file_upload_successfully_list.append(file.filename) - # 5.更新kb的文档数和文档总大小 - total_cnt, total_sz = await DocumentManager.select_cnt_and_sz_by_kb_id(kb_id) - update_dict = {'document_number': total_cnt, - 'document_size': total_sz} - await KnowledgeBaseManager.update(kb_id, update_dict) - except Exception as e: - raise e - finally: - if target_dir is not None and os.path.exists(target_dir): - shutil.rmtree(target_dir) - return file_upload_successfully_list - - -async def generate_document_download_link(id) -> List[Dict]: - return await MinIO.generate_download_link(OssConstant.MINIO_BUCKET_DOCUMENT, str(id)) - - -async def get_file_name_and_extension(document_id): - try: - document_entity = await DocumentManager.select_by_id(document_id) - return document_entity.name, document_entity.extension.replace('.', '') - except Exception as e: - logging.error("Get ({}) file name and extension error: {}".format(document_id, e)) - raise DocumentException(f"Get ({document_id}) file name and extension error.") - - -async def init_temporary_document_parse_task( - temporary_document_list: List[Dict]) -> List[uuid.UUID]: - try: - results = [] - ids=[] - for temporary_document in temporary_document_list: - ids.append(temporary_document['id']) - tmp_dict=await get_temporary_document_parse_status(ids) - doc_status_dict={} - for i in range(len(tmp_dict)): - doc_status_dict[tmp_dict[i]['id']]=tmp_dict[i]['status'] - for temporary_document in temporary_document_list: - if temporary_document['id'] in doc_status_dict and \ - ( - doc_status_dict[temporary_document['id']] ==TaskConstant.TASK_STATUS_PENDING or \ - doc_status_dict[temporary_document['id']] ==TaskConstant.TASK_STATUS_RUNNING - ): - continue - temporary_entity=await TemporaryDocumentManager.select_by_id(temporary_document['id']) - if temporary_entity is None: - temporary_entity=await TemporaryDocumentManager.insert( - TemporaryDocumentEntity( - id=temporary_document['id'], - name=temporary_document['name'], - extension=temporary_document['type'], - bucket_name=temporary_document['bucket_name'], - parser_method=temporary_document['parser_method'], - chunk_size=temporary_document['chunk_size'], - status=TemporaryDocumentStatusEnum.EXIST - ) - ) - else: - temporary_document['extension']=temporary_document['type'] - del temporary_document['type'] - temporary_document['status']=TemporaryDocumentStatusEnum.EXIST - temporary_entity=await TemporaryDocumentManager.update( - temporary_document['id'], - temporary_document + status=DataSetStatus.IDLE.value, + full_text='', + abstract='', + abstract_vector=None ) - if temporary_entity is None: + doc_entities.append(doc_entity) + except Exception as e: + err = f"上传文档失败, 文档名: {doc.filename}, 错误信息: {e}" + logging.error("[DocumentService] %s", err) continue - task_entity = await TaskManager.insert( - TaskEntity( - op_id=temporary_document['id'], - type=TaskConstant.PARSE_TEMPORARY_DOCUMENT, - retry=0, - status=TaskConstant.TASK_STATUS_PENDING - ) - ) - TaskRedisHandler.put_task_by_tail(config['REDIS_PENDING_TASK_QUEUE_NAME'], str(task_entity.id)) - results.append(temporary_document['id']) - return results - except Exception as e: - raise DocumentException("Init temporary docuemnt parse task={} error: {}".format(temporary_document_list, e)) - - -async def get_related_document( - content: str, - top_k: int, - temporary_document_ids: List[uuid.UUID] = None, - kb_id: uuid.UUID = None - ) -> List[uuid.UUID]: - if top_k==0: - return [] - results = [] - try: - if temporary_document_ids: - chunk_tuple_list = TemporaryChunkManager.find_top_k_similar_chunks( - temporary_document_ids, content, max(top_k // 2, 1)) - elif kb_id: - chunk_tuple_list = await ChunkManager.find_top_k_similar_chunks(kb_id, content, max(top_k//2, 1)) - else: - return [] - for chunk_tuple in chunk_tuple_list: - results.append(chunk_tuple[0]) - except Exception as e: - logging.error(f"Failed to find similar chunks by keywords due to: {e}") - return [] - try: - target_vector = await Vectorize.vectorize_embedding(content) - if target_vector is not None: - chunk_entity_list = [] - if temporary_document_ids: - chunk_id_list = await TemporaryVectorItemsManager.find_top_k_similar_temporary_vectors(target_vector, temporary_document_ids, top_k-len(chunk_tuple_list)) - chunk_entity_list = await TemporaryChunkManager.select_by_temporary_chunk_ids(chunk_id_list) - elif kb_id: - kb_entity = await KnowledgeBaseManager.select_by_id(kb_id) - if kb_entity is None: - return [] - embedding_model = kb_entity.embedding_model - vector_items_id = kb_entity.vector_items_id - dim = embedding_model_out_dimensions[embedding_model] - vector_items_table = await PostgresDB.get_dynamic_vector_items_table(vector_items_id, dim) - chunk_id_list = await VectorItemsManager.find_top_k_similar_vectors(vector_items_table, target_vector, kb_id, top_k-len(chunk_tuple_list)) - chunk_entity_list = await ChunkManager.select_by_chunk_ids(chunk_id_list) - for chunk_entity in chunk_entity_list: - results.append(chunk_entity.id) - except Exception as e: - logging.error(f"Failed to find similar chunks by vecrot due to: {e}") - return results - - -async def stop_temporary_document_parse_task(doc_id): - try: - task_entity = await TaskManager.select_by_op_id(doc_id) - task_id = task_entity.id - await TaskHandler.restart_or_clear_task(task_id, TaskActionEnum.CANCEL) - except Exception as e: - logging.error("Stop temporary docuemnt parse task={} error: {}".format(task_id, e)) - - -async def delete_temporary_document(doc_ids) -> List[TemporaryDocumentDTO]: - if len(doc_ids) == 0: - return [] - try: - # 删除document表的记录 - for doc_id in doc_ids: - await stop_temporary_document_parse_task(doc_id) - tmp_list=await TemporaryDocumentManager.select_by_ids(doc_ids) - doc_ids=[] - for tmp in tmp_list: - doc_ids.append(tmp.id) - await TemporaryDocumentManager.update_all(doc_ids, {"status": TemporaryDocumentStatusEnum.DELETED}) - tmp_list = await TemporaryDocumentManager.select_by_ids(doc_ids) - tmp_set = set() - for tmp in tmp_list: - tmp_set.add(tmp.id) - results = [] - for doc_id in doc_ids: - if doc_id not in tmp_set: - results.append(doc_id) - return results - except Exception as e: - logging.error("Delete temporary document ({}) error: {}".format(doc_ids, e)) - raise DocumentException(f"Delete temporary document ({doc_ids}) error.") - - -async def get_temporary_document_parse_status(doc_ids) -> List[TemporaryDocumentDTO]: - try: - results = [] - temporary_document_list = await TemporaryDocumentManager.select_by_ids(doc_ids) - doc_ids=[] - for temporary_document in temporary_document_list: - doc_ids.append(temporary_document.id) - task_entity_list=await TaskManager.select_latest_task_by_op_ids(doc_ids) - task_entity_dict={} - for task_entity in task_entity_list: - task_entity_dict[task_entity.op_id]=task_entity - for i in range(len(temporary_document_list)): - task_entity = task_entity_dict.get(temporary_document_list[i].id,None) - if task_entity is None: - task_status = TaskConstant.TASK_STATUS_FAILED - else: - task_status = task_entity.status - results.append( - TemporaryDocumentDTO( - id=temporary_document_list[i].id, - status=task_status - ) - ) - return results - except Exception as e: - logging.error(f"Get temporary documents ({doc_ids}) parser status error due to: {e}") - return [] + index = 0 + while len(index) < len(doc_entities): + try: + await DocumentManager.add_documents(doc_entities[index:index+1024]) + index += 1024 + except Exception as e: + err = f"上传文档失败, 文档名: {doc_entity.name}, 错误信息: {e}" + logging.error("[DocumentService] %s", err) + continue + for doc_entity in doc_entities: + await TaskQueueService.init_task(TaskType.DOC_PARSE.value, doc_entity.id) + doc_ids = [doc_entity.id for doc_entity in doc_entities] + return doc_ids + + @staticmethod + async def parse_docs(doc_ids: list[uuid.UUID], parse: bool) -> list[uuid.UUID]: + """解析文档""" + try: + doc_ids_success = [] + for doc_id in doc_ids: + doc_entity = await DocumentManager.get_document_by_doc_id(doc_id) + if parse: + if doc_entity.status != DocumentStatus.IDLE.value: + continue + task_id = await TaskQueueService.init_task(TaskType.DOC_PARSE.value, doc_id) + if task_id: + doc_ids_success.append(doc_id) + else: + if doc_entity.status != DocumentStatus.PENDING.value and doc_entity.status != DocumentStatus.RUNNING.value: + continue + task_id = await TaskQueueService.stop_task(doc_id) + if task_id: + doc_ids_success.append(doc_id) + return doc_ids_success + except Exception as e: + err = "解析文档失败" + logging.exception("[DocumentService] %s", err) + raise e + + @staticmethod + async def update_doc(doc_id: uuid.UUID, req: UpdateDocumentRequest) -> uuid.UUID: + """更新文档""" + doc_dict = await Convertor.convert_update_document_request_to_dict(req) + await DocumentManager.update_document_by_doc_id(doc_id, doc_dict) + return doc_id + + @staticmethod + async def delete_docs_by_ids(doc_ids: list[uuid.UUID]) -> list[uuid.UUID]: + """删除文档""" + try: + task_entities = await TaskManager.list_current_tasks_by_op_ids(doc_ids) + for task_entity in task_entities: + await TaskQueueService.stop_task(task_entity.id) + doc_entities = await DocumentManager.update_document_by_doc_ids( + doc_ids, {"status": DocumentStatus.DELETED.value}) + doc_ids = [doc_entity.id for doc_entity in doc_entities] + return doc_ids + except Exception as e: + err = "删除文档失败" + logging.exception("[DocumentService] %s", err) + raise e diff --git a/data_chain/apps/service/knwoledge_base_service.py b/data_chain/apps/service/knwoledge_base_service.py index 99d643a..fe1fb97 100644 --- a/data_chain/apps/service/knwoledge_base_service.py +++ b/data_chain/apps/service/knwoledge_base_service.py @@ -1,301 +1,319 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +import aiofiles import uuid -import secrets -from data_chain.logger.logger import logger as logging -import traceback -from typing import List, Tuple +from fastapi import APIRouter, Depends, Query, Body, File, UploadFile import os -import shutil -import aiofiles import yaml -from fastapi import UploadFile -from data_chain.apps.base.convertor.task_convertor import TaskConvertor -from data_chain.apps.base.convertor.knowledge_convertor import KnowledgeConvertor -from data_chain.apps.base.task.task_handler import TaskRedisHandler, TaskHandler -from data_chain.manager.document_manager import DocumentManager +from data_chain.logger.logger import logger as logging +from data_chain.entities.request_data import ( + ListTeamRequest, + CreateKnowledgeBaseRequest, + ListKnowledgeBaseRequest, + UpdateKnowledgeBaseRequest +) +from data_chain.entities.response_data import ( + TeamKnowledgebase, + ListAllKnowledgeBaseMsg, + Team, + ListKnowledgeBaseMsg, + ListDocumentTypesResponse) +from data_chain.apps.base.zip_handler import ZipHandler +from data_chain.apps.service.task_queue_service import TaskQueueService +from data_chain.entities.enum import Tokenizer, ParseMethod, TeamType, TeamStatus, KnowledgeBaseStatus, TaskType +from data_chain.entities.common import DEFAULt_DOC_TYPE_ID, default_roles, IMPORT_KB_PATH_IN_OS, EXPORT_KB_PATH_IN_MINIO, IMPORT_KB_PATH_IN_MINIO +from data_chain.stores.database.database import TeamEntity, KnowledgeBaseEntity, DocumentTypeEntity +from data_chain.stores.minio.minio import MinIO +from data_chain.apps.base.convertor import Convertor +from data_chain.manager.team_manager import TeamManager from data_chain.manager.knowledge_manager import KnowledgeBaseManager from data_chain.manager.document_type_manager import DocumentTypeManager -from data_chain.manager.task_manager import TaskManager, TaskStatusReportManager -from data_chain.models.service import KnowledgeBaseDTO, TaskDTO -from data_chain.models.constant import OssConstant, TaskConstant -from data_chain.stores.minio.minio import MinIO -from data_chain.stores.postgres.postgres import TaskEntity -from data_chain.exceptions.exception import KnowledgeBaseException -from data_chain.apps.service.document_service import run_document -from data_chain.models.constant import DocumentEmbeddingConstant, OssConstant, TaskConstant, KnowledgeStatusEnum, TaskActionEnum, EmbeddingModelEnum, ParseMethodEnum, embedding_model_out_dimensions -from data_chain.apps.base.document.zip_handler import ZipHandler -from data_chain.stores.postgres.postgres import KnowledgeBaseEntity -from data_chain.config.config import config - - -async def _validate_knowledge_base_belong_to_user(user_id: uuid.UUID, kb_id: str) -> bool: - kb_entity = await KnowledgeBaseManager.select_by_id(kb_id) - if kb_entity is None: - raise KnowledgeBaseException("Knowledge base not exist") - if kb_entity.user_id != user_id: - raise KnowledgeBaseException("Knowledge base not belong to user") - - -async def list_knowledge_base_task(page_number, page_size, params) -> List[TaskDTO]: - try: - # 直接查询task记录表 - total, all_task_list = await TaskManager.select_by_page(page_number, page_size, params) - - knowledge_dto_list = [] - for task_entity in all_task_list: - if not task_entity.op_id: - continue - knowledge_base_entity = await KnowledgeBaseManager.select_by_id(task_entity.op_id) - if knowledge_base_entity is None: - continue - document_type_entity_list = await DocumentTypeManager.select_by_knowledge_base_id(str(knowledge_base_entity.id)) - knowledge_base_dto = KnowledgeConvertor.convert_entity_to_dto( - knowledge_base_entity, document_type_entity_list) - latest_task_status_report_entity_list = await TaskStatusReportManager.select_latest_report_by_task_ids([task_entity.id]) - if len(latest_task_status_report_entity_list) >= 1: - task_dto = TaskConvertor.convert_entity_to_dto(task_entity, latest_task_status_report_entity_list) - else: - task_dto = TaskConvertor.convert_entity_to_dto(task_entity, []) - knowledge_base_dto.task = task_dto - knowledge_dto_list.append(knowledge_base_dto) - return total, knowledge_dto_list - except Exception: - logging.error("List user={} knowledge base task error: {}".format(params['user_id'], traceback.format_exc())) - raise KnowledgeBaseException(f"List user={str(params['user_id'])} knowledge base task error.") - - -async def rm_all_knowledge_base_task(user_id: uuid.UUID, types: List[str]) -> TaskDTO: - try: - task_entity_list = await TaskManager.select_by_user_id_and_task_type_list(user_id, types) - for task_entity in task_entity_list: - task_id = task_entity.id - if task_entity.status == TaskConstant.TASK_STATUS_PENDING or task_entity.status == TaskConstant.TASK_STATUS_RUNNING: - await stop_knowledge_base_task(task_id) - await TaskManager.update(task_id, {'status': TaskConstant.TASK_STATUS_DELETED}) - return True - except Exception as e: - logging.error("Stop knowledge base task={} error: {}".format(task_id, e)) - raise KnowledgeBaseException(f"Stop knowledge base task={task_id} error.") - - -async def rm_knowledge_base_task(task_id: uuid.UUID) -> TaskDTO: - try: - task_entity = await TaskManager.select_by_id(task_id) - if task_entity is None: - return - if task_entity.status == TaskConstant.TASK_STATUS_PENDING or task_entity.status == TaskConstant.TASK_STATUS_RUNNING: - await stop_knowledge_base_task(task_id) - await TaskManager.update(task_id, {'status': TaskConstant.TASK_STATUS_DELETED}) - return True - except Exception as e: - logging.error("Stop knowledge base task={} error: {}".format(task_id, e)) - raise KnowledgeBaseException(f"Stop knowledge base task={task_id} error.") - - -async def stop_knowledge_base_task(task_id: uuid.UUID) -> TaskDTO: - try: - task_entity = await TaskManager.select_by_id(task_id) - task_id = task_entity.id - await TaskHandler.restart_or_clear_task(task_id, TaskActionEnum.CANCEL) - except Exception as e: - logging.error("Stop knowledge base task={} error={}".format(task_id, e)) - raise KnowledgeBaseException(f"Stop knowledge base task={task_id} error.") - +from data_chain.manager.document_manager import DocumentManager +from data_chain.manager.role_manager import RoleManager +from data_chain.manager.task_manager import TaskManager -async def create_knowledge_base(tmp_dict) -> KnowledgeBaseDTO: - try: - if await KnowledgeBaseManager.select_by_user_id_and_kb_name(tmp_dict['user_id'], tmp_dict['name']): - tmp_dict['name'] = '资产'+'_'+secrets.token_hex(16) - except Exception: - logging.error("Create knowledge base error: {}".format(traceback.format_exc())) - raise KnowledgeBaseException("Create knowledge base error.") - knowledge_base_entity = KnowledgeConvertor.convert_dict_to_entity(tmp_dict) - try: - knowledge_base_entity = await KnowledgeBaseManager.insert(knowledge_base_entity) - document_type_entity_list = await DocumentTypeManager.insert_bulk( - knowledge_base_entity.id, tmp_dict['document_type_list']) - return KnowledgeConvertor.convert_entity_to_dto(knowledge_base_entity, document_type_entity_list) - except Exception as e: - logging.error("Create knowledge base error: {}".format(e)) - raise KnowledgeBaseException("Create knowledge base error.") +class KnowledgeBaseService: + """知识库服务""" + @staticmethod + async def validate_user_action_to_knowledge_base( + user_sub: str, kb_id: uuid.UUID, action: str) -> bool: + """验证用户在知识库中的操作权限""" + try: + kb_entity = await KnowledgeBaseManager.get_knowledge_base_by_kb_id(kb_id) + if kb_entity is None: + logging.exception("[KnowledgeBaseService] 知识库不存在") + raise Exception("Knowledge base not exist") + action_entity = await RoleManager.get_action_by_team_id_user_sub_and_action( + user_sub, kb_entity.team_id, action) + if action_entity is None: + return False + return True + except Exception as e: + err = "验证用户在知识库中的操作权限失败" + logging.exception("[KnowledgeBaseService] %s", err) + raise e -async def update_knowledge_base(update_dict: dict) -> KnowledgeBaseDTO: - kb_id = update_dict['id'] - document_type_list = update_dict.get('document_type_list', None) - if document_type_list is not None: - del update_dict['document_type_list'] - else: - document_type_list = [] - knowledge_base_entity = await KnowledgeBaseManager.select_by_user_id_and_kb_name( - update_dict['user_id'], update_dict['name']) - if knowledge_base_entity and knowledge_base_entity.id != kb_id: - raise KnowledgeBaseException("knowbaseledge asset with duplicate names!") - knowledge_base_entity = await KnowledgeBaseManager.select_by_id(kb_id) - document_type_entity_list = await DocumentTypeManager.update_knowledge_base_document_type( - kb_id, document_type_list) - await KnowledgeBaseManager.update(kb_id, update_dict) - updated_knowledge_base_entity = await KnowledgeBaseManager.select_by_id(kb_id) - return KnowledgeConvertor.convert_entity_to_dto(updated_knowledge_base_entity, document_type_entity_list) + @staticmethod + async def list_kb_by_user_sub(user_sub: str, kb_name: str = None) -> ListAllKnowledgeBaseMsg: + """列出知识库""" + try: + # 获取用户所在团队 + team_entities = await TeamManager.list_team_mycreated_user_sub(user_sub) + team_entities += await TeamManager.list_team_myjoined_by_user_sub(user_sub) + team_ids = [team_entity.id for team_entity in team_entities] + # 获取知识库 + knowledge_base_entities = await KnowledgeBaseManager.list_knowledge_base_by_team_ids(team_ids, kb_name) + team_knowledge_bases_dict = {} + for knowledge_base_entity in knowledge_base_entities: + team_id = knowledge_base_entity.team_id + if team_id not in team_knowledge_bases_dict: + team_knowledge_bases_dict[team_id] = [] + team_knowledge_bases_dict[team_id].append(knowledge_base_entity) + team_knowledge_bases = [] + for team_entity in team_entities: + knowledge_base_entities = team_knowledge_bases_dict.get(team_entity.id, []) + team_knowledge_base = TeamKnowledgebase( + teamId=team_entity.id, + teamName=team_entity.name, + kbList=[] + ) + for knowledge_base_entity in knowledge_base_entities: + team_knowledge_base.kb_list.append( + Convertor.convert_knowledge_base_entity_to_knowledge_base(knowledge_base_entity) + ) + team_knowledge_bases.append(team_knowledge_base) + return ListAllKnowledgeBaseMsg(teamKnowledgebases=team_knowledge_bases) + except Exception as e: + err = "列出知识库失败" + logging.exception("[KnowledgeBaseService] %s", err) + raise e + @staticmethod + async def list_kb_by_team_id(req: ListKnowledgeBaseRequest) -> ListKnowledgeBaseMsg: + """列出知识库""" + try: + # 获取知识库 + total, knowledge_base_entities = await KnowledgeBaseManager.list_knowledge_base(req) + knowledge_bases = [] + for knowledge_base_entity in knowledge_base_entities: + doc_type_entities = await KnowledgeBaseManager.list_doc_types_by_kb_id(knowledge_base_entity.id) + knowledge_bases.append( + (await Convertor.convert_knowledge_base_entity_to_knowledge_base(knowledge_base_entity, doc_type_entities)) + ) + return ListKnowledgeBaseMsg(total=total, kbList=knowledge_bases) + except Exception as e: + err = "列出知识库失败" + logging.exception("[KnowledgeBaseService] %s", err) + raise e -async def list_knowledge_base(params, page_number=1, page_size=1) -> Tuple[List[KnowledgeBaseDTO], int]: - try: - total, knowledge_base_entity_list = await KnowledgeBaseManager.select_by_page( - params, page_number, page_size) - knowledge_base_dto_list = [] - for knowledge_base_entity in knowledge_base_entity_list: - document_type_entity_list = await DocumentTypeManager.select_by_knowledge_base_id(knowledge_base_entity.id) - knowledge_base_dto_list.append( - KnowledgeConvertor.convert_entity_to_dto( - knowledge_base_entity, - document_type_entity_list + @staticmethod + async def list_doc_types_by_kb_id(kb_id: uuid.UUID) -> ListDocumentTypesResponse: + """列出知识库文档类型""" + try: + # 获取文档类型 + document_type_entities = await KnowledgeBaseManager.list_doc_types_by_kb_id(kb_id) + document_types = [] + for document_type_entity in document_type_entities: + document_types.append( + (await Convertor.convert_document_type_entity_to_document_type_response(document_type_entity)) ) + return ListDocumentTypesResponse(documentTypes=document_types) + except Exception as e: + err = "列出知识库文档类型失败" + logging.exception("[KnowledgeBaseService] %s", err) + raise e + + @staticmethod + async def generate_knowledge_base_download_link(task_id: uuid.UUID) -> str: + """生成知识库下载链接""" + try: + # 获取知识库 + download_link = await MinIO.generate_download_link( + bucket_name=EXPORT_KB_PATH_IN_MINIO, + object_name=f"{task_id}.zip", ) - return (knowledge_base_dto_list, total) - except Exception as e: - logging.error("List knowledge base error: {}".format(e)) - raise KnowledgeBaseException("List knowledge base error.") + return download_link + except Exception as e: + err = "生成知识库下载链接失败" + logging.exception("[KnowledgeBaseService] %s", err) + raise e + @staticmethod + async def create_kb( + user_sub: str, team_id: uuid.UUID, req: CreateKnowledgeBaseRequest) -> uuid.UUID: + """创建知识库""" + try: + knowledge_base_entity = await Convertor.convert_create_knowledge_base_request_to_knowledge_base_entity( + user_sub, team_id, req) + knowledge_base_entity = await KnowledgeBaseManager.add_knowledge_base(knowledge_base_entity) + if knowledge_base_entity is None: + err = "创建知识库失败" + logging.exception("[KnowledgeBaseService] %s", err) + raise e + doc_types = req.doc_types + doc_type_entities = [] + for doc_type in doc_types: + doc_type_entity = await Convertor.convert_kb_id_and_requeset_document_type_to_document_type_entity(knowledge_base_entity.id, doc_type) + doc_type_entities.append(doc_type_entity) + await DocumentTypeManager.add_document_types(doc_type_entities) + return knowledge_base_entity.id + except Exception as e: + err = "创建知识库失败" + logging.exception("[KnowledgeBaseService] %s", err) + raise e -async def rm_knowledge_base(kb_id: str) -> bool: - try: - task_entity = await TaskManager.select_by_op_id(kb_id) - if task_entity: - await stop_knowledge_base_task(task_entity.id) - knowledge_base_entity = await KnowledgeBaseManager.select_by_id(kb_id) - await TaskManager.update_task_by_op_id(kb_id, {'status': TaskConstant.TASK_STATUS_DELETED}) - # 删除document/knowledge_base之前先查出来得到文件名 - document_entity_list = await DocumentManager.select_by_knowledge_base_id(kb_id) - if len(document_entity_list) > 0: - for document_entity in document_entity_list: - await run_document({'id': document_entity.id, 'run': DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_CANCEL}) - task_entity_list = await TaskManager.select_by_op_id(document_entity.id, 'all') - for task_entity in task_entity_list: - await TaskManager.update_task_by_op_id(task_entity.id, {'status': TaskConstant.TASK_STATUS_DELETED}) - await MinIO.delete_object(OssConstant.MINIO_BUCKET_DOCUMENT, str(document_entity.id)) - if knowledge_base_entity.id: - await MinIO.delete_object(OssConstant.MINIO_BUCKET_KNOWLEDGEBASE, str(knowledge_base_entity.id)) - await KnowledgeBaseManager.delete(kb_id) - return True - except Exception as e: - logging.error(f"Delete knowledge base error: {e}") - raise KnowledgeBaseException("Delete knowledge base error.") + @staticmethod + async def get_kb_entity_from_yaml(user_sub: str, team_id: uuid.UUID, yaml_path: str) -> KnowledgeBaseEntity: + """获取知识库配置并转换为数据库实体""" + try: + with open(yaml_path, "r", encoding="utf-8") as f: + kb_config = yaml.load(f, Loader=yaml.SafeLoader) + kb_entity = KnowledgeBaseEntity( + team_id=team_id, + author_id=user_sub, + author_name=user_sub, + name=kb_config.get("name", ""), + tokenizer=kb_config.get("tokenizer", Tokenizer.ZH.value), + description=kb_config.get("description", ""), + embedding_model=kb_config.get("embedding_model", ""), + doc_cnt=0, + doc_size=0, + upload_count_limit=kb_config.get("upload_count_limit", 128), + upload_size_limit=kb_config.get("upload_size_limit", 512), + default_parse_method=kb_config.get("default_parse_method", ParseMethod.GENERAL.value), + default_chunk_size=kb_config.get("default_chunk_size", 1024), + status=kb_config.get("status", KnowledgeBaseStatus.IDLE.value), + ) + return kb_entity + except Exception as e: + err = "获取知识库配置失败" + logging.exception("[KnowledgeBaseService] %s", err) + @staticmethod + async def import_kbs(user_sub: str, team_id: uuid.UUID, kb_packages: list[UploadFile] = File(...)) -> str: + """导入知识库""" + if len(kb_packages) > 5: + err = "导入知识库失败,知识库数量超过5个" + logging.exception("[KnowledgeBaseService] %s", err) + raise Exception(err) + kb_packages_sz = 0 + for kb_package in kb_packages: + kb_packages_sz += kb_package.size + if kb_packages_sz > 5 * 1024 * 1024 * 1024: + err = "导入知识库失败,知识库大小超过5G" + logging.exception("[KnowledgeBaseService] %s", err) + raise Exception(err) + kb_import_task_ids = [] + for kb_package in kb_packages: + tmp_path = os.join(IMPORT_KB_PATH_IN_OS, str(uuid.uuid4())) + zip_file_name = kb_package.filename + zip_file_path = os.path.join(tmp_path, zip_file_name) + try: + async with aiofiles.open(zip_file_path, "wb") as f: + content = await kb_package.read() + await f.write(content) + except Exception as e: + err = "导入知识库失败" + logging.exception("[KnowledgeBaseService] %s", err) + continue + if not ZipHandler.check_zip_file(zip_file_path): + err = "导入知识库失败,包含文件数量过多或者解压缩之后体积过大" + logging.exception("[KnowledgeBaseService] %s", err) + continue + if not await ZipHandler.unzip_file(zip_file_path, tmp_path, ['kb_config.yaml']): + err = "导入知识库失败,解压缩失败" + logging.exception("[KnowledgeBaseService] %s", err) + continue + kb_entity = await KnowledgeBaseService.get_kb_entity_from_yaml( + user_sub, team_id, os.path.join(tmp_path, 'kb_config.yaml')) + if kb_entity is None: + err = "导入知识库失败,获取知识库配置失败" + logging.exception("[KnowledgeBaseService] %s", err) + continue + await MinIO.put_object(IMPORT_KB_PATH_IN_MINIO, str(kb_entity.id), zip_file_path) + try: + task_id = await TaskQueueService.init_task(TaskType.KB_IMPORT.value, kb_entity.id) + if task_id: + kb_import_task_ids.append(task_id) + except Exception as e: + err = "导入知识库失败" + logging.exception("[KnowledgeBaseService] %s", err) + return kb_import_task_ids -async def parse_knowledge_yaml_file(user_id: uuid.UUID, unzip_folder_path: str): - knowledge_yaml_path = os.path.join(unzip_folder_path, "knowledge_base.yaml") - if not os.path.exists(knowledge_yaml_path): - return None - # 解析knowledge.yaml - parse_methods = set(ParseMethodEnum.get_all_values()) - with open(knowledge_yaml_path, 'r')as kb_file: - data = yaml.safe_load(kb_file) - # 写入knoweldge_base表 - if 'name' not in data.keys() or await KnowledgeBaseManager.select_by_user_id_and_kb_name( - user_id, data['name']) is not None: - data['name'] = '资产'+'_'+secrets.token_hex(16) - if 'embedding_model' not in data.keys() or data['embedding_model'] not in embedding_model_out_dimensions.keys(): - data['embedding_model'] = list(embedding_model_out_dimensions.keys())[0] - if 'default_chunk_size' not in data.keys() or not isinstance(data['default_chunk_size'], int): - data['default_chunk_size'] = 1024 - parse_mathod = data.get('default_parser_method', ParseMethodEnum.GENERAL) - if parse_mathod not in parse_methods: - parse_mathod = ParseMethodEnum.GENERAL - knowledge_base_entity = KnowledgeBaseEntity( - name=data['name'], - user_id=user_id, - language=data.get('language', 'zh'), - description=data.get('description', ''), - embedding_model=data.get('embedding_model', EmbeddingModelEnum.BGE_LARGE_ZH), - document_number=0, - document_size=0, - vector_items_id=uuid.uuid4(), - default_parser_method=parse_mathod, - default_chunk_size=data.get('default_chunk_size', 1024), - status=KnowledgeStatusEnum.EXPROTING - ) - knowledge_base_entity = await KnowledgeBaseManager.insert(knowledge_base_entity) - # 写入document_type表 - await DocumentTypeManager.insert_bulk(knowledge_base_entity.id, data.get('document_type_list', [])) - return knowledge_base_entity + @staticmethod + async def export_kb_by_kb_ids(kb_ids: list[uuid.UUID]) -> str: + """导出知识库""" + kb_export_task_ids = [] + for kb_id in kb_ids: + try: + task_id = await TaskQueueService.init_task(TaskType.KB_EXPORT.value, kb_id) + if task_id: + kb_export_task_ids.append(task_id) + except Exception as e: + err = "导出知识库失败" + logging.exception("[KnowledgeBaseService] %s", err) + return kb_export_task_ids + @staticmethod + async def update_doc_types(kb_id: uuid.UUID, doc_types: list[dict]) -> None: + new_doc_type_map = {uuid.UUID(doc_type['doc_type_id']): doc_type['doc_type_name'] for doc_type in doc_types} + new_doc_type_ids = {uuid.UUID(doc_type['doc_type_id']) for doc_type in doc_types} + old_doc_type_entities = await KnowledgeBaseManager.list_doc_types_by_kb_id(kb_id) + old_doc_type_ids = {doc_type_entity.id for doc_type_entity in old_doc_type_entities} + delete_doc_type_ids = old_doc_type_ids - new_doc_type_ids + add_doc_type_ids = new_doc_type_ids - old_doc_type_ids + update_doc_type_ids = old_doc_type_ids & new_doc_type_ids + await DocumentManager.update_doc_type_by_kb_id(kb_id, delete_doc_type_ids, DEFAULt_DOC_TYPE_ID) + doc_type_entities = [] + for doc_type_id in add_doc_type_ids: + doc_type_entity = DocumentTypeEntity( + id=doc_type_id, + kb_id=kb_id, + name=new_doc_type_map[doc_type_id], + ) + doc_type_entities.append(doc_type_entity) + await DocumentTypeManager.add_document_types(doc_type_entities) + for update_doc_type_id in update_doc_type_ids: + try: + await DocumentTypeManager.update_doc_type_by_doc_type_id(update_doc_type_id, new_doc_type_map[update_doc_type_id]) + except Exception as e: + err = "更新文档类型失败" + logging.exception("[KnowledgeBaseService] %s", err) + continue -async def submit_import_knowledge_base_task(user_id: uuid.UUID, zip_file_list: List[UploadFile]) -> List[str]: - target_dir = os.path.join(OssConstant.IMPORT_FILE_SAVE_FOLDER, str(user_id), secrets.token_hex(16)) - if os.path.exists(target_dir): - shutil.rmtree(target_dir) - os.makedirs(target_dir) - for zip_file in zip_file_list: - # 1. 将zip文件写入本地stash目录 - zip_file_name = zip_file.filename - zip_file_path = os.path.join(target_dir, zip_file_name) + @staticmethod + async def update_kb_by_kb_id(kb_id: uuid.UUID, req: UpdateKnowledgeBaseRequest) -> uuid.UUID: + """更新知识库""" try: - async with aiofiles.open(zip_file_path, "wb") as f: - content = await zip_file.read() - await f.write(content) + knowledge_base_dict = await Convertor.convert_update_knowledge_base_request_to_dict(req) + doc_types = knowledge_base_dict.get("doc_types", []) + del knowledge_base_dict["doc_types"] + knowledge_base_entity = await KnowledgeBaseManager.update_knowledge_base_by_kb_id(kb_id, knowledge_base_dict) + if knowledge_base_entity is None: + err = "更新知识库失败" + logging.exception("[KnowledgeBaseService] %s", err) + raise e + await KnowledgeBaseService.update_doc_types(kb_id, doc_types) + return knowledge_base_entity.id except Exception as e: - logging.error(f"{zip_file_name}写入失败: {e}") - continue - if not ZipHandler.check_zip_file(zip_file_path): - logging.error(f"{zip_file_name}文件过大或者已损坏") - if os.path.exists(zip_file_path): - os.remove(zip_file_path) - zip_file_save_successfully_list = [] - zip_file_name_list = os.listdir(target_dir) - for zip_file_name in zip_file_name_list: - zip_file_path = os.path.join(target_dir, zip_file_name) - # 2. 将zip文件上传到minIO - if not await ZipHandler.unzip_file(zip_file_path, target_dir, ['knowledge_base.yaml']): - logging.error(f"{zip_file_name}解压失败") - continue - knowledge_base_entity = await parse_knowledge_yaml_file(user_id, target_dir) - if not await MinIO.put_object(OssConstant.MINIO_BUCKET_KNOWLEDGEBASE, str(knowledge_base_entity.id), zip_file_path): - logging.error(f"{zip_file_name}存入minio失败") - if os.path.exists(zip_file_path): - os.remove(zip_file_path) - continue - if os.path.exists(zip_file_path): - os.remove(zip_file_path) - # 3. 创建task表记录 - zip_file_save_successfully_list.append(zip_file_name) - task_entity = await TaskManager.insert(TaskEntity(user_id=user_id, - op_id=knowledge_base_entity.id, - type=TaskConstant.IMPORT_KNOWLEDGE_BASE, - retry=0, - status=TaskConstant.TASK_STATUS_PENDING)) - # 4. 最后提交redis任务 - TaskRedisHandler.put_task_by_tail(config['REDIS_PENDING_TASK_QUEUE_NAME'], str(task_entity.id)) - if os.path.exists(target_dir): - shutil.rmtree(target_dir) - return zip_file_save_successfully_list - + err = "更新知识库失败" + logging.exception("[KnowledgeBaseService] %s", err) + raise e -async def submit_export_knowledge_base_task(user_id, kb_id) -> bool: - try: - result = await KnowledgeBaseManager.update(kb_id, {'status': KnowledgeStatusEnum.EXPROTING}) - if result is None: - return "" - # 写入task记录 - task_entity = await TaskManager.insert(TaskEntity(user_id=user_id, - op_id=kb_id, - type=TaskConstant.EXPORT_KNOWLEDGE_BASE, - retry=0, - status=TaskConstant.TASK_STATUS_PENDING)) - # 提交redis任务队列 - TaskRedisHandler.put_task_by_tail(config['REDIS_PENDING_TASK_QUEUE_NAME'], str(task_entity.id)) - return str(task_entity.id) - except Exception as e: - logging.error("Submit save knowledge base task error: {}".format(e)) - return "" - - -async def generate_knowledge_base_download_link(task_id) -> str: - try: - task_entity = await TaskManager.select_by_id(task_id) - if task_entity.status != TaskConstant.TASK_STATUS_SUCCESS: - return "" - return await MinIO.generate_download_link(OssConstant.MINIO_BUCKET_KNOWLEDGEBASE, str(task_id)) - except Exception as e: - logging.error("Export knowledge base zip files error: {}".format(e)) - raise KnowledgeBaseException("Export knowledge base zip files error.") + @staticmethod + async def delete_kb_by_kb_ids(kb_ids: list[uuid.UUID]) -> list[uuid.UUID]: + """删除知识库""" + kb_ids_deleted = [] + for kb_id in kb_ids: + try: + task_entity = await TaskManager.get_current_task_by_op_id(kb_id) + if task_entity is not None: + await TaskQueueService.stop_task(task_entity.id) + await KnowledgeBaseManager.update_knowledge_base_by_kb_id( + kb_id, {"status": KnowledgeBaseStatus.DELETED}) + kb_ids_deleted.append(kb_id) + except Exception as e: + err = "删除知识库失败" + logging.exception("[KnowledgeBaseService] %s", err) + continue + return kb_ids_deleted diff --git a/data_chain/apps/service/router_service.py b/data_chain/apps/service/router_service.py new file mode 100644 index 0000000..8e4ca62 --- /dev/null +++ b/data_chain/apps/service/router_service.py @@ -0,0 +1,12 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from fastapi import FastAPI, Request, Depends +from fastapi.routing import APIRoute + + +def get_route_info(request: Request): + route = request.scope.get("route") + if isinstance(route, APIRoute): + request_method = request.method + route_path = route.path + return request_method+' '+route_path + return '' diff --git a/data_chain/apps/service/session_service.py b/data_chain/apps/service/session_service.py new file mode 100644 index 0000000..8b492ab --- /dev/null +++ b/data_chain/apps/service/session_service.py @@ -0,0 +1,54 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +import uuid +from fastapi import Request, HTTPException, status, Response +from starlette import status +from starlette.exceptions import HTTPException +from starlette.requests import HTTPConnection + +from data_chain.apps.base.convertor import Convertor +from data_chain.manager.user_manager import UserManager +from data_chain.manager.session_manager import SessionManager +from data_chain.config.config import config +from data_chain.logger.logger import logger as logging + + +class UserHTTPException(HTTPException): + def __init__(self, status_code: int, retcode: int, rtmsg: str, data): + super().__init__(status_code=status_code) + self.retcode = retcode + self.rtmsg = rtmsg + self.data = data + + +async def verify_user(request: HTTPConnection): + """验证用户是否在Session中""" + return + if config["DEBUG"]: + return + try: + session_id = request.cookies["ECSESSION"] + except: + raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, + retcode=401, rtmsg="Authentication Error.", data="") + if not SessionManager.verify_user(session_id): + raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, + retcode=401, rtmsg="Authentication Error.", data="") + + +async def get_user_sub(request: HTTPConnection) -> uuid: + """从Session中获取用户""" + return "admin" + if config["DEBUG"]: + user_sub = "admin" + else: + try: + session_id = request.cookies["ECSESSION"] + except: + raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, + retcode=401, rtmsg="Authentication Error.", data="") + user_sub = SessionManager.get_user_sub(session_id) + if not user_sub: + raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, + retcode=401, rtmsg="Authentication Error.", data="") + await UserManager.add_user((await Convertor.convert_user_sub_to_user_entity(user_sub))) + return user_sub diff --git a/data_chain/apps/service/task_queue_service.py b/data_chain/apps/service/task_queue_service.py new file mode 100644 index 0000000..3db8b91 --- /dev/null +++ b/data_chain/apps/service/task_queue_service.py @@ -0,0 +1,116 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +import asyncio +import uuid +from data_chain.entities.enum import TaskType, TaskStatus +from data_chain.apps.base.task.worker.base_worker import BaseWorker +from data_chain.stores.mongodb.mongodb import MongoDB, Task +from data_chain.manager.task_manager import TaskManager +from data_chain.manager.task_queue_mamanger import TaskQueueManager +from data_chain.logger.logger import logger as logging + + +class TaskQueueService: + """任务队列""" + lock = asyncio.Lock() + + @staticmethod + async def init_task_queue(): + task_entities = await TaskManager.get_task_by_task_status(TaskStatus.RUNNING.value) + for task_entity in task_entities: + try: + flag = await BaseWorker.reinit(task_entity.id) + if flag: + task = Task(_id=task_entity.id, status=TaskStatus.PENDING.value) + await TaskQueueManager.update_task_by_id(task_entity.id, task) + else: + await BaseWorker.stop(task_entity.id) + await TaskQueueManager.delete_task_by_id(task_entity.id) + except Exception as e: + warining = f"[TaskQueueService] 初始化任务失败 {e}" + logging.warning(warining) + + @staticmethod + async def init_task(task_type: str, op_id: uuid.UUID) -> uuid.UUID: + """初始化任务""" + try: + async with TaskQueueService.lock: + task_id = await BaseWorker.init(task_type, op_id) + if task_id: + await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.PENDING.value)) + return task_id + except Exception as e: + err = f"[TaskQueueService] 初始化任务失败 {e}" + logging.exception(err) + raise e + + @staticmethod + async def stop_task(task_id: uuid.UUID): + """停止任务""" + try: + async with TaskQueueService.lock: + flag = await BaseWorker.stop(task_id) + if not flag: + return None + return task_id + except Exception as e: + err = f"[TaskQueueService] 停止任务失败 {e}" + logging.exception(err) + raise e + + @staticmethod + async def delete_task(task_id: uuid.UUID): + """删除任务""" + try: + async with TaskQueueService.lock: + task_id = await BaseWorker.stop(task_id) + task_id = await BaseWorker.delete(task_id) + return task_id + except Exception as e: + err = f"[TaskQueueService] 删除任务失败 {e}" + logging.exception(err) + raise e + + @staticmethod + async def handle_successed_tasks(): + handle_successed_task_limit = 1024 + for i in range(handle_successed_task_limit): + async with TaskQueueService.lock: + task = await TaskQueueManager.get_oldest_tasks_by_status(TaskStatus.SUCCESS.value) + if task is None: + break + await BaseWorker.deinit(task.task_id) + await TaskQueueManager.delete_task_by_id(task.task_id) + + @staticmethod + async def handle_failed_tasks(): + handle_failed_task_limit = 1024 + for i in range(handle_failed_task_limit): + async with TaskQueueService.lock: + task = await TaskQueueManager.get_oldest_tasks_by_status(TaskStatus.FAILED.value) + if task is None: + break + flag = await BaseWorker.reinit(task.task_id) + if flag: + task = Task(_id=task.task_id, status=TaskStatus.PENDING.value) + await TaskQueueManager.update_task_by_id(task.task_id, task) + else: + await TaskQueueManager.delete_task_by_id(task.task_id) + + @staticmethod + async def handle_pending_tasks(): + handle_pending_task_limit = 128 + for i in range(handle_pending_task_limit): + async with TaskQueueService.lock: + task = await TaskQueueManager.get_oldest_tasks_by_status(TaskStatus.PENDING.value) + if task is None: + break + flag = await BaseWorker.run(task.task_id) + if not flag: + break + await TaskQueueManager.delete_task_by_id(task.task_id) + + @staticmethod + async def handle_tasks(): + await TaskQueueService.handle_successed_tasks() + await TaskQueueService.handle_failed_tasks() + await TaskQueueService.handle_pending_tasks() diff --git a/data_chain/apps/service/task_service.py b/data_chain/apps/service/task_service.py index 05c068c..92bbfde 100644 --- a/data_chain/apps/service/task_service.py +++ b/data_chain/apps/service/task_service.py @@ -1,107 +1,92 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from data_chain.logger.logger import logger as logging -import traceback +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. import uuid -import pytz -from datetime import datetime -from data_chain.apps.base.task.document_task_handler import DocumentTaskHandler -from data_chain.apps.base.task.knowledge_base_task_handler import KnowledgeBaseTaskHandler -from data_chain.apps.base.task.task_handler import TaskRedisHandler, TaskHandler +from data_chain.apps.service.task_queue_service import TaskQueueService +from data_chain.logger.logger import logger as logging +from data_chain.entities.request_data import ( + ListTaskRequest +) +from data_chain.entities.response_data import ( + ListTaskMsg) +from data_chain.entities.enum import TaskType, TaskStatus +from data_chain.entities.common import default_roles +from data_chain.stores.database.database import TeamEntity +from data_chain.apps.base.convertor import Convertor from data_chain.manager.task_manager import TaskManager -from data_chain.models.constant import TaskConstant, TaskActionEnum -from data_chain.exceptions.exception import TaskException -from data_chain.config.config import config - +from data_chain.manager.task_report_manager import TaskReportManager +from data_chain.manager.role_manager import RoleManager -async def _validate_task_belong_to_user(user_id: uuid.UUID, task_id: str) -> bool: - task_entity = await TaskManager.select_by_id(task_id) - if task_entity is None: - raise TaskException("Task not exist") - if task_entity.user_id != user_id: - raise TaskException("Task not belong to user") +class TaskService: + """任务服务""" + @staticmethod + async def validate_user_action_to_task( + user_sub: str, task_id: uuid.UUID, action: str) -> bool: + """验证用户在任务中的操作权限""" + try: + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + logging.exception("[TaskService] 任务不存在") + raise Exception("Task not exist") + action_entity = await RoleManager.get_action_by_team_id_user_sub_and_action( + user_sub, task_entity.team_id, action) + if action_entity is None: + return False + return True + except Exception as e: + err = "验证用户在任务中的操作权限失败" + logging.exception("[TaskService] %s", err) + raise e -async def task_queue_handler(): - # 处理成功的任务 - success_task_ids = TaskRedisHandler.select_all_task(config['REDIS_SUCCESS_TASK_QUEUE_NAME']) - for task_id in success_task_ids: - TaskRedisHandler.remove_task_by_task_id(config['REDIS_SUCCESS_TASK_QUEUE_NAME'], task_id) - TaskHandler.remove_task(uuid.UUID(task_id)) - task_entity = await TaskManager.select_by_id(uuid.UUID(task_id)) - if task_entity is None or task_entity.status != TaskConstant.TASK_STATUS_SUCCESS: - TaskRedisHandler.put_task_by_tail(config['REDIS_SILENT_ERROR_TASK_QUEUE_NAME'], task_id) - # 处理需要重新开始的任务 - restart_task_ids = TaskRedisHandler.select_all_task(config['REDIS_RESTART_TASK_QUEUE_NAME']) - for task_id in restart_task_ids: - TaskRedisHandler.remove_task_by_task_id(config['REDIS_RESTART_TASK_QUEUE_NAME'], task_id) - task_entity = await TaskManager.select_by_id(uuid.UUID(task_id)) - if task_entity is None or task_entity.status != TaskConstant.TASK_STATUS_RUNNING: - TaskHandler.remove_task(uuid.UUID(task_id)) - TaskRedisHandler.put_task_by_tail(config['REDIS_SILENT_ERROR_TASK_QUEUE_NAME'], task_id) - else: - await TaskHandler.restart_or_clear_task(uuid.UUID(task_id)) - # 静默处理状态错误的任务 - silent_error_task_ids = TaskRedisHandler.select_all_task(config['REDIS_SILENT_ERROR_TASK_QUEUE_NAME']) - for task_id in silent_error_task_ids: - TaskRedisHandler.remove_task_by_task_id(config['REDIS_SILENT_ERROR_TASK_QUEUE_NAME'], task_id) - task_entity = await TaskManager.select_by_id(task_id) - if task_entity is None: - continue - elif task_entity.status == TaskConstant.TASK_STATUS_DELETED: - await TaskHandler.restart_or_clear_task(task_id, TaskActionEnum.DELETE) - elif task_entity.status == TaskConstant.TASK_STATUS_CANCELED: - await TaskHandler.restart_or_clear_task(task_id, TaskActionEnum.CANCEL) - # 处理等待的任务 - task_start_cnt = 0 - while True: - task_id = TaskRedisHandler.get_task_by_head(config['REDIS_PENDING_TASK_QUEUE_NAME']) - if task_id is None: - break - task_id = uuid.UUID(task_id) - task_entity = await TaskManager.select_by_id(task_id) - if task_entity is None or task_entity.status != TaskConstant.TASK_STATUS_PENDING: - TaskRedisHandler.put_task_by_tail(config['REDIS_SILENT_ERROR_TASK_QUEUE_NAME'], task_id) - continue + async def list_task(req: ListTaskRequest) -> ListTaskMsg: + """列出任务""" try: - func = None - # 处理上传资产库任务 - if task_entity.type == TaskConstant.IMPORT_KNOWLEDGE_BASE: - func = KnowledgeBaseTaskHandler.handle_import_knowledge_base_task - # 处理打包资产库任务 - elif task_entity.type == TaskConstant.EXPORT_KNOWLEDGE_BASE: - func = KnowledgeBaseTaskHandler.handle_export_knowledge_base_task - # 处理临时文件解析任务 - elif task_entity.type == TaskConstant.PARSE_TEMPORARY_DOCUMENT: - func = DocumentTaskHandler.handle_parser_temporary_document_task - # 处理文档解析任务 - elif task_entity.type == TaskConstant.PARSE_DOCUMENT: - func = DocumentTaskHandler.handle_parser_document_task - if not TaskHandler.add_task(task_id, target=func, t_id=task_entity.id): - TaskRedisHandler.put_task_by_tail(config['REDIS_PENDING_TASK_QUEUE_NAME'], str(task_id)) - break - await TaskManager.update(task_id, {'status': TaskConstant.TASK_STATUS_RUNNING}) + total, task_entities = await TaskManager.list_task(user_sub, req) + tasks = [] + task_ids = [task_entity.id for task_entity in task_entities] + task_report_entities = await TaskReportManager.list_current_task_report_by_task_ids(task_ids) + task_report_dict = {task_report_entity.task_id: task_report_entity + for task_report_entity in task_report_entities} + for task_entity in task_entities: + task_report = task_report_dict.get(task_entity.id, None) + task = await Convertor.convert_task_entity_to_task(task_entity, task_report) + tasks.append(task) + return ListTaskMsg(total=total, tasks=tasks) except Exception as e: - logging.error(f"Handle task {task_id} error: {e}") - await TaskHandler.restart_or_clear_task(task_id) - break - task_start_cnt += 1 - if task_start_cnt == 8: - break + err = "列出任务失败" + logging.exception("[TaskService] %s", err) + raise e + @staticmethod + async def delete_task_by_task_id( + task_id: uuid.UUID) -> uuid.UUID: + """根据任务ID删除任务""" + try: + task_entity = await TaskManager.get_task_by_task_id(task_id) + if task_entity is None: + err = "任务不存在" + logging.exception("[TaskService] %s", err) + raise Exception(err) + task_id = await TaskQueueService.stop_task(task_id) + return task_id + except Exception as e: + err = "删除任务失败" + logging.exception("[TaskService] %s", err) + raise e -async def monitor_tasks(): - task_id_list = TaskHandler.list_tasks() - for task_id in task_id_list: - task_entity = await TaskManager.select_by_id(task_id) - if task_entity is None: - continue - current_time_utc = datetime.now(pytz.utc) - time_difference = current_time_utc - task_entity.created_time - seconds = time_difference.total_seconds() - if seconds > 12*3600: - if task_entity.status==TaskConstant.TASK_STATUS_DELETED: - await TaskHandler.restart_or_clear_task(task_id,TaskActionEnum.DELETE) - elif task_entity.status==TaskConstant.TASK_STATUS_CANCELED: - await TaskHandler.restart_or_clear_task(task_id,TaskActionEnum.CANCEL) - else: - await TaskHandler.restart_or_clear_task(task_id) + @staticmethod + async def delete_task_by_type( + user_sub: str, team_id: uuid.UUID, task_type: TaskType) -> list[uuid.UUID]: + """根据任务类型删除任务""" + try: + task_entities = await TaskManager.list_task_by_user_sub_and_team_id_and_type( + user_sub, team_id, task_type) + task_ids = [] + for task_entity in task_entities: + task_id = await TaskQueueService.stop_task(task_entity.id) + if task_id: + task_ids.append(task_id) + return task_ids + except Exception as e: + err = "删除任务失败" + logging.exception("[TaskService] %s", err) + raise e diff --git a/data_chain/apps/service/team_service.py b/data_chain/apps/service/team_service.py new file mode 100644 index 0000000..9404c6a --- /dev/null +++ b/data_chain/apps/service/team_service.py @@ -0,0 +1,102 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +import uuid +from data_chain.logger.logger import logger as logging +from data_chain.entities.request_data import ListTeamRequest, CreateTeamRequest +from data_chain.entities.response_data import ListTeamMsg +from data_chain.entities.enum import TeamType, TeamStatus +from data_chain.entities.common import default_roles +from data_chain.stores.database.database import TeamEntity +from data_chain.apps.base.convertor import Convertor +from data_chain.manager.team_manager import TeamManager +from data_chain.manager.role_manager import RoleManager + + +class TeamService: + """团队服务""" + @staticmethod + async def validate_user_action_in_team( + user_sub: str, team_id: uuid.UUID, action: str) -> bool: + """验证用户在团队中的操作权限""" + try: + action_entity = await RoleManager.get_action_by_team_id_user_sub_and_action(user_sub, team_id, action) + if action_entity is None: + return False + return True + except Exception as e: + err = "验证用户在团队中的操作权限失败" + logging.exception("[TeamService] %s", err) + raise e + + @staticmethod + async def list_teams(user_sub: str, req: ListTeamRequest) -> ListTeamMsg: + """列出团队""" + if req.team_type == TeamType.MYCREATED: + total, team_entities = await TeamManager.list_team_mycreated_user_sub(user_sub, req) + elif req.team_type == TeamType.MYJOINED: + total, team_entities = await TeamManager.list_team_myjoined_by_user_sub(user_sub, req) + else: + total, team_entities = await TeamManager.list_pulic_team(user_sub, req) + teams = [] + for team_entity in team_entities: + team = await Convertor.convert_team_entity_to_team(team_entity) + teams.append(team) + return ListTeamMsg(total=total, teams=teams) + + @staticmethod + async def create_team(user_sub: str, req: CreateTeamRequest) -> uuid.UUID: + """创建团队""" + try: + team_entity = await Convertor.convert_create_team_request_to_team_entity(user_sub, req) + team_entity = await TeamManager.add_team(team_entity) + team_user_entity = await Convertor.convert_user_sub_and_team_id_to_team_user_entity(user_sub, team_entity.id) + await TeamManager.add_team_user(team_user_entity) + become_creator_flag = False + for role_dict in default_roles: + role_entity = await Convertor.convert_default_role_dict_to_role_entity(team_entity.id, role_dict) + role_entity = await RoleManager.add_role(role_entity) + if not become_creator_flag: + user_role_entity = await Convertor.convert_user_sub_role_id_and_team_id_to_user_role_entity( + user_sub, role_entity.id, team_entity.id) + await RoleManager.add_user_role(user_role_entity) + become_creator_flag = True + role_action_entities = await Convertor.convert_default_role_action_dicts_to_role_action_entities(role_entity.id, role_dict['actions']) + await RoleManager.add_role_actions(role_action_entities) + return team_entity.id + except Exception as e: + err = "创建团队失败" + logging.exception("[TeamService] %s", err) + raise e + + @staticmethod + async def update_team_by_team_id( + user_sub: str, team_id: uuid.UUID, req: CreateTeamRequest) -> bool: + """更新团队""" + try: + team_dict = await Convertor.convert_update_team_request_to_dict(req) + team_entity = await TeamManager.update_team_by_id(team_id, team_dict) + if team_entity is None: + err = "更新团队失败" + logging.exception("[TeamService] %s", err) + raise "更新团队失败, 团队不存在" + return team_entity.id + except Exception as e: + err = "更新团队失败" + logging.exception("[TeamService] %s", err) + raise e + + @staticmethod + async def soft_delete_team_by_team_id( + team_id: uuid.UUID) -> bool: + """软删除团队""" + try: + team_entity = await TeamManager.update_team_by_id( + team_id, {"status": TeamStatus.DELETED.value}) + if team_entity is None: + err = "软删除团队失败" + logging.exception("[TeamService] %s", err) + raise "软删除团队失败, 团队不存在" + return team_entity.id + except Exception as e: + err = "软删除团队失败" + logging.exception("[TeamService] %s", err) + raise e diff --git a/data_chain/apps/service/user_service.py b/data_chain/apps/service/user_service.py deleted file mode 100644 index 431ad6e..0000000 --- a/data_chain/apps/service/user_service.py +++ /dev/null @@ -1,96 +0,0 @@ -import uuid -from fastapi import Request, HTTPException, status, Response -from starlette import status -from starlette.exceptions import HTTPException -from starlette.requests import HTTPConnection -from data_chain.logger.logger import logger as logging - -from data_chain.apps.base.session.session import SessionManager -from data_chain.config.config import config -from data_chain.manager.user_manager import UserManager - - -class UserHTTPException(HTTPException): - def __init__(self, status_code: int, retcode: int, rtmsg: str, data): - super().__init__(status_code=status_code) - self.retcode = retcode - self.rtmsg = rtmsg - self.data = data - - -def verify_user(request: HTTPConnection): - try: - session_id = request.cookies["WD_ECSESSION"] - except: - raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, - retcode=401, rtmsg="Authentication Error.", data="") - if not SessionManager.verify_user(session_id): - raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, - retcode=401, rtmsg="Authentication Error.", data="") - - -def get_session(request: HTTPConnection): - try: - session_id = request.cookies["WD_ECSESSION"] - except: - raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, - retcode=401, rtmsg="Authentication Error.", data="") - if not SessionManager.verify_user(session_id): - raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, - retcode=401, rtmsg="Authentication Error.", data="") - return session_id - - -def get_user_id(request: HTTPConnection) -> uuid: - try: - session_id = request.cookies["WD_ECSESSION"] - except: - raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, - retcode=401, rtmsg="Authentication Error.", data="") - user_id = SessionManager.get_user_id(session_id) - if not user_id: - raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, - retcode=401, rtmsg="Authentication Error.", data="") - return user_id - - -async def verify_passwd(request: HTTPConnection): - # 检查请求是否为GET方法 - if request.method == 'GET': - # 从查询字符串中提取参数 - account = request.query_params.get('account') - current_passwd = request.query_params.get('password') - if not account or not current_passwd: - raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, - retcode=401, rtmsg="Login failed.", data="") - user_info_entity = await UserManager.get_user_info_by_account(account) - if user_info_entity is None: - raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, - retcode=401, rtmsg="Login failed.", data="") - - if current_passwd != user_info_entity.passwd: - raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, - retcode=401, rtmsg="Login failed.", data="") - - -def verify_csrf_token(request: Request, response: Response): - if not config["ENABLE_CSRF"]: - return - try: - csrf_token = request.headers.get('x-csrf-token').strip("\"") - session = request.cookies.get('WD_ECSESSION') - except: - raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, - retcode=401, rtmsg="Authentication Error.", data="") - if not SessionManager.verify_csrf_token(session, csrf_token): - raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, - retcode=401, rtmsg="CSRF token is invalid.", data="") - - new_csrf_token = SessionManager.create_csrf_token(session) - if not new_csrf_token: - raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, retcode=401, - rtmsg="Renew CSRF token failed.", data="") - - response.set_cookie("wd_csrf_tk", new_csrf_token, max_age=config["SESSION_TTL"] * 60, - secure=True, domain=config["DOMAIN"], samesite="strict") - return response diff --git a/data_chain/common/.env.example b/data_chain/common/.env.example index 1e0836f..35868e9 100644 --- a/data_chain/common/.env.example +++ b/data_chain/common/.env.example @@ -1,57 +1,43 @@ +# debug +DEBUG = # FastAPI -UVICORN_IP= -UVICORN_PORT= -SSL_CERTFILE= -SSL_KEYFILE= -SSL_ENABLE= +UVICORN_IP = +UVICORN_PORT = +SSL_CERTFILE = +SSL_KEYFILE = +SSL_ENABLE = # LOG METHOD -LOG_METHOD= +LOG_METHOD = # Database -DATABASE_TYPE= -DATABASE_URL= +DATABASE_URL = # MinIO -MINIO_ENDPOINT= -MINIO_ACCESS_KEY= -MINIO_SECRET_KEY= -MINIO_SECURE= -# Redis -REDIS_HOST= -REDIS_PORT= -REDIS_PWD= -REDIS_PENDING_TASK_QUEUE_NAME= -REDIS_SUCCESS_TASK_QUEUE_NAME= -REDIS_RESTART_TASK_QUEUE_NAME= -REDIS_SILENT_ERROR_TASK_QUEUE_NAME= +MINIO_ENDPOINT = +MINIO_ACCESS_KEY = +MINIO_SECRET_KEY = +MINIO_SECURE = +# MongoDB +MONGODB_USER = +MONGODB_PASSWORD = +MONGODB_HOST = +MONGODB_PORT = +MONGODB_DATABASE = # Task -TASK_RETRY_TIME= +TASK_RETRY_TIME = # Embedding -EMBEDDING_TYPE= -EMBEDDING_API_KEY= -EMBEDDING_ENDPOINT= -EMBEDDING_MODEL_NAME= +EMBEDDING_TYPE = +EMBEDDING_API_KEY = +EMBEDDING_ENDPOINT = +EMBEDDING_MODEL_NAME = # Token -SESSION_TTL= -CSRF_KEY= +SESSION_TTL = +CSRF_KEY = # Security -HALF_KEY1= -HALF_KEY2= -HALF_KEY3= +HALF_KEY1 = +HALF_KEY2 = +HALF_KEY3 = # Prompt file -PROMPT_PATH= +PROMPT_PATH = # Stop Words PATH -STOP_WORDS_PATH= -# LLM config -MODEL_1_MODEL_ID= -MODEL_1_MODEL_NAME= -MODEL_1_MODEL_TYPE= -MODEL_1_OPENAI_API_BASE= -MODEL_1_OPENAI_API_KEY= -MODEL_1_MAX_TOKENS= -MODEL_ENH= -# DEFAULT USER -DEFAULT_USER_ACCOUNT= -DEFAULT_USER_PASSWD= -DEFAULT_USER_NAME= -DEFAULT_USER_LANGUAGE= +STOP_WORDS_PATH = # DOCUMENT PARSER -DOCUMENT_PARSE_USE_CPU_LIMIT= \ No newline at end of file +DOCUMENT_PARSE_USE_CPU_LIMIT = \ No newline at end of file diff --git a/data_chain/common/pp.py b/data_chain/common/pp.py index 00e8ab7..bfcfb50 100644 --- a/data_chain/common/pp.py +++ b/data_chain/common/pp.py @@ -1,5 +1,11 @@ +from data_chain.parser.tools.token_tool import TokenTool +import json +import asyncio +from data_chain.config.config import config +from data_chain.llm.llm import LLM import yaml + def load_yaml_file(file_path): try: with open(file_path, 'r', encoding='utf-8') as file: @@ -10,11 +16,15 @@ def load_yaml_file(file_path): print(f"文件 {file_path} 未找到。") except yaml.YAMLError as e: print(f"解析YAML文件时出错: {e}") -def save_yaml_file(yaml_data,file_path): - with open('prompt.yaml', 'w', encoding='utf-8') as file: + + +def save_yaml_file(yaml_data, file_path): + with open(file_path, 'w', encoding='utf-8') as file: yaml.dump(yaml_data, file, allow_unicode=True, default_flow_style=False) + + # 示例:加载YAML文件 -file_path = 'prompt.yaml' +file_path = './data_chain/common/prompt.yaml' yaml_data = load_yaml_file(file_path) if yaml_data: print(yaml_data) @@ -22,4 +32,43 @@ if yaml_data: # yaml_data['INTENT_DETECT_PROMPT_TEMPLATE']='' # yaml_data['OCR_ENHANCED_PROMPT']='' # yaml_data['DETERMINE_ANSWER_AND_QUESTION']='' -save_yaml_file(yaml_data,file_path) \ No newline at end of file +# save_yaml_file(yaml_data,file_path) +# llm = LLM( +# model_name=config['MODEL_NAME'], +# openai_api_base=config['OPENAI_API_BASE'], +# openai_api_key=config['OPENAI_API_KEY'], +# request_timeout=config['REQUEST_TIMEOUT'], +# max_tokens=config['MAX_TOKENS'], +# temperature=config['TEMPERATURE'], +# ) +# prompt_template = yaml_data['CONTENT_TO_ABSTRACT_PROMPT'] +# content = '''在那遥远的山谷之中,有一片神秘而又美丽的森林。阳光透过茂密的枝叶,洒下一片片金色的光斑,仿佛是大自然精心编织的梦幻画卷。森林里,鸟儿欢快地歌唱,那清脆的歌声在林间回荡,传递着生机与活力。松鼠们在树枝间跳跃,敏捷的身影如同灵动的音符,谱写着森林的乐章。 +# 沿着蜿蜒的小径前行,脚下的落叶发出沙沙的声响,仿佛在诉说着岁月的故事。路边的野花竞相开放,红的、黄的、紫的,五彩斑斓,散发着阵阵芬芳。蝴蝶在花丛中翩翩起舞,它们那绚丽的翅膀,如同绚丽的丝绸,在微风中轻轻摇曳。 +# 不远处,一条清澈的小溪潺潺流淌。溪水从山间缓缓流下,清澈见底,能看到鱼儿在水中自由自在地游弋。溪水撞击着石头,发出叮叮咚咚的声音,宛如一首美妙的乐曲。溪边的石头上,长满了青苔,仿佛是大自然赋予的绿色绒毯。 +# 在森林的深处,隐藏着一座古老的城堡。城堡的墙壁上爬满了藤蔓,仿佛是岁月留下的痕迹。城堡的大门紧闭,似乎隐藏着无数的秘密。传说中,这座城堡里住着一位美丽的公主,她被邪恶的巫师困在了这里,等待着勇敢的骑士前来解救。 +# 有一天,一位年轻的骑士听闻了这个传说,决定踏上寻找公主的冒险之旅。他骑着一匹矫健的白马,手持长剑,穿过茂密的森林,越过湍急的河流,历经千辛万苦,终于来到了城堡的门前。 +# 骑士用力地敲打着城堡的大门,然而,大门却纹丝不动。就在他感到绝望的时候,一只小精灵出现在他的面前。小精灵告诉他,要打开城堡的大门,必须找到三把神奇的钥匙。这三把钥匙分别隐藏在森林的三个不同的地方,只有集齐了这三把钥匙,才能打开城堡的大门。 +# 骑士听了小精灵的话,毫不犹豫地踏上了寻找钥匙的旅程。他在森林里四处寻找,遇到了各种各样的困难和挑战。有时候,他会迷失在森林的深处,找不到方向;有时候,他会遇到凶猛的野兽,不得不与之搏斗。但是,骑士始终没有放弃,他坚信自己一定能够找到钥匙,救出公主。 +# 终于,经过一番艰苦的努力,骑士找到了三把神奇的钥匙。他拿着钥匙,来到城堡的门前,将钥匙插入锁孔。随着一阵清脆的响声,城堡的大门缓缓打开。骑士走进城堡,沿着昏暗的走廊前行,终于在一间房间里找到了公主。 +# 公主看到骑士,眼中闪烁着希望的光芒。她告诉骑士,自己被巫师困在这里已经很久了,一直在等待着有人来救她。骑士将公主带出城堡,骑着白马,离开了这片神秘的森林。 +# 从此以后,骑士和公主过上了幸福的生活。他们的故事在这片土地上流传开来,成为了人们心中的一段佳话。 +# 在这个世界上,还有许多未知的领域等待着我们去探索。也许,在那遥远的地方,还有更多神秘的故事等待着我们去发现。无论是茂密的森林,还是古老的城堡,都充满了无限的魅力。它们吸引着我们不断地前行,去追寻那未知的美好。 +# 当夜幕降临,天空中繁星闪烁。那璀璨的星光,仿佛是大自然赋予我们的最美的礼物。在这宁静的夜晚,我们可以静静地聆听大自然的声音,感受它的神奇与美妙。 +# 有时候,我们会在生活中遇到各种各样的困难和挫折。但是,只要我们像那位勇敢的骑士一样,坚持不懈,勇往直前,就一定能够克服困难,实现自己的梦想。生活就像一场冒险,充满了未知和挑战。我们要勇敢地面对生活中的一切,用自己的智慧和勇气去创造美好的未来。 +# 在这个充满变化的世界里,我们要学会珍惜身边的一切。无论是亲人、朋友,还是那美丽的大自然,都是我们生活中不可或缺的一部分。我们要用心去感受他们的存在,用爱去呵护他们。 +# 随着时间的推移,那片神秘的森林依然静静地矗立在那里。它见证了无数的故事,承载了无数的回忆。而那座古老的城堡,也依然默默地守护着那些神秘的传说。它们就像历史的见证者,诉说着过去的辉煌与沧桑。 +# 我们生活在一个充满希望和梦想的时代。每一个人都有自己的追求和目标,都在为了实现自己的梦想而努力奋斗。无论是科学家、艺术家,还是普通的劳动者,都在各自的岗位上发光发热,为社会的发展做出自己的贡献。 +# 在科技飞速发展的今天,我们的生活发生了翻天覆地的变化。互联网的普及,让我们的信息传播更加迅速和便捷。我们可以通过网络了解到世界各地的新闻和文化,与远方的朋友进行交流和沟通。科技的进步,也让我们的生活更加舒适和便利。我们有了更加先进的交通工具、更加便捷的通讯设备,以及更加高效的生活方式。 +# 然而,科技的发展也带来了一些问题。比如,环境污染、能源危机等。这些问题不仅影响着我们的生活质量,也威胁着我们的未来。因此,我们在享受科技带来的便利的同时,也要关注环境保护和可持续发展。我们要努力寻找更加绿色、环保的生活方式,减少对自然资源的消耗和对环境的破坏。 +# 除了科技的发展,文化的传承和创新也是我们生活中重要的一部分。每一个国家和民族都有自己独特的文化传统,这些文化传统是我们的精神财富,也是我们民族的灵魂。我们要传承和弘扬自己的文化传统,让它们在新的时代焕发出新的活力。同时,我们也要积极吸收和借鉴其他国家和民族的优秀文化成果,促进文化的交流和融合。 +# 在教育方面,我们要注重培养学生的创新精神和实践能力。我们要让学生在学习知识的同时,学会思考、学会创新、学会实践。只有这样,我们才能培养出适应时代发展需要的高素质人才。 +# 在人际交往中,我们要学会尊重他人、理解他人、关心他人。我们要建立良好的人际关系,与他人和谐相处。只有这样,我们才能在生活中感受到温暖和快乐。 +# 总之,我们的生活是丰富多彩的,充满了无限的可能。我们要珍惜生活中的每一个瞬间,用积极的态度去面对生活中的一切。无论是成功还是失败,无论是欢笑还是泪水,都是我们生活中的宝贵财富。让我们一起努力,创造一个更加美好的未来!''' +# abstract = '' +# for i in range(10): +# part = TokenTool.get_k_tokens_words_from_content(content, 100) +# content = content[len(part):] +# sys_call = prompt_template.format(content=part, abstract=abstract) +# user_call = '请详细输出内容的摘要,不要输出其他内容' +# abstract = asyncio.run(llm.nostream([], sys_call, user_call)) +# print(abstract) diff --git a/data_chain/common/prompt.yaml b/data_chain/common/prompt.yaml index c9b33b6..c1895ba 100644 --- a/data_chain/common/prompt.yaml +++ b/data_chain/common/prompt.yaml @@ -1,23 +1,3 @@ -DETERMINE_ANSWER_AND_QUESTION: '你是一个问题关联性判断专家,能够准确判断用户当前提出的问题与给出的文本块的相关性,并输出相关程度: - - 注意 - - 1. 不要输出额外内容 - - 2. 如果文本块相关且上下文完整,输出"6" - - 3. 如果文本块相关但上下文都缺失,输出"5" - - 4. 如果文本块相关,但缺少后文,输出"4" - - 5. 如果文本块相关,但缺少前文,输出"3" - - 6. 如果文本块问题有轻微相关性,输出"2" - - 7. 如果文本块完全不相关,输出"1" - - - 下面是用户当前问题: {question} 下面是文本块: {chunk} ' INTENT_DETECT_PROMPT_TEMPLATE: "\n\n \n \ \ 根据历史对话,推断用户的实际意图并补全用户的提问内容。\n 用户的提问内容将在中给出,历史对话将在中给出。\n\ \ 要求:\n 1. 参考下面给出的样例,请直接输出补全后的提问内容;输出不要包含XML标签,不要包含任何解释说明;\n\ @@ -56,3 +36,296 @@ OCR_ENHANCED_PROMPT: '你是一个图片ocr内容总结专家,你的任务是 当前图片上一部分的ocr内容总结:{pre_part_description} 当前图片部分ocr的结果:{part}' +QA_TO_STATEMENTS_PROMPT: '你是一个文本分解专家,你的任务是根据我给出的问题和答案,将答案提取为多个陈诉,陈诉使用列表形式返回 + + 注意: + #01 陈诉必须来源于答案中的重点内容 + #02 陈诉按相对顺序排列 + #03 输出的单个陈诉长度不超过50个字 + #04 输出的陈诉总数不超过20个 + #05 请仅输出陈诉列表,不要输出其他内容 + 例子: + + 输入: + 问题:openEuler是什么操作系统? + 答案:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能、高安全性和高可靠性等特点。 + 输出: + [ + \"openEuler是一个开源的操作系统\", + \"openEuler旨在为云计算和边缘计算提供支持\", + \"openEuler具有高性能、高安全性和高可靠性等特点\" + ] + + 下面是给出的问题和答案: + 问题:{question} + 答案:{answer} +' +ANSWER_TO_ANSWER_PROMPT: '你是一个文本分析专家,你的任务对比两个文本之间的相似度,并输出一个0-100之间的分数且保留两位小数: +注意: +#01 请根据文本在语义、语序和关键字上的相似度进行打分 +#02 请仅输出分数,不要输出其他内容 +例子: +输入1: + 文本1:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能、高安全性和高可靠性等特点。 + 文本2:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能、高安全性和高可靠性等特点。 + 输出1:100.00 +输入2: + 文本1:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能、高安全性和高可靠性等特点。 + 文本2:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能和高安全性等特点。 + 输出2:90.00 +输入3: + 文本1:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能、高安全性和高可靠性等特点。 + 文本2:白马非马 + 输出3:00.00 +下面是给出的文本: + 文本1:{text_1} + 文本2:{text_2} +' +CONTENT_TO_STATEMENTS_PROMPT: '你是一个文本分解专家,你的任务是根据我给出的文本,将文本提取为多个陈诉,陈诉使用列表形式返回 + + 注意: + #01 陈诉必须来源于文本中的重点内容 + #02 陈诉按相对顺序排列 + #03 输出的单个陈诉长度不少于20个字,不超过50个字 + #04 输出的陈诉总数不超过3个 + #05 请仅输出陈诉列表,不要输出其他内容 + 例子: + + 输入:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能、高安全性和高可靠性等特点。 + 输出: + [ + \"openEuler是一个开源的操作系统\", + \"openEuler旨在为云计算和边缘计算提供支持\", + \"openEuler具有高性能、高安全性和高可靠性等特点\" + ] + + 下面是给出的文本: + {content} + ' +STATEMENTS_TO_FRAGMENT_PROMPT: '你是一个文本专家,你的任务是根据给出的陈诉是否与片段强相关 + 注意: + #01 如果陈诉与片段强相关或者来自于片段,请输出YES + #02 如果陈诉中的内容与片段无关,请输出NO + #03 请仅输出YES或NO,不要输出其他内容 + 例子: + 输入1: + + 陈诉:openEuler是一个开源的操作系统。 + 片段:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能、高安全性和高可靠性等特点。 + 输出1:YES + + 输入2: + 陈诉:白马非马 + 片段:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能、高安全性和高可靠性等特点。 + 输出2:NO + + 下面是给出的陈诉和片段: + 陈诉:{statement} + 片段:{fragment} + ' +STATEMENTS_TO_QUESTION_PROMPT: '你是一个文本分析专家,你的任务是根据给出的陈诉和问题判断,陈诉是否与问题相关 + 注意: + #01 如果陈诉是否与问题相关,请输出YES + #02 如果陈诉与问题不相关,请输出NO + #03 请仅输出YES或NO,不要输出其他内容 + 例子: + 输入1: + 陈诉:openEuler是一个开源的操作系统。 + 问题:openEuler是什么操作系统? + 输出1:YES + + 输入2: + 陈诉:白马非马 + 问题:openEuler是什么操作系统? + 输出2:NO + + 下面是给出的陈诉和问题: + 陈诉:{statement} + 问题:{question} + ' +GENREATE_QUESTION_FROM_CONTENT_PROMPT: '你是一个文本分析专家,你的任务是根据给出的文本生成{k}个问题并用列表返回 + 注意: + #01 问题必须来源于文本中的内容 + #02 单个问题长度不超过50个字 + #03 不要输出重复的问题 + #04 请仅输出问题列表,不要输出其他内容 + 例子: + 输入:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能、高安全性和高可靠性等特点。 + 输出: + [\"openEuler是什么操作系统?\",\"openEuler旨在为哪个领域提供支持?\",\"openEuler具有哪些特点?\",\"openEuler的安全性如何?\",\"openEuler的可靠性如何?\"] + 下面是给出的文本: + {content} +' +GENERATE_ANSWER_FROM_QUESTION_AND_CONTENT_PROMPT: '你是一个文本分析专家,你的任务是根据给出的问题和文本 + 生成答案 + 注意: + #01 答案必须来源于文本中的内容 + #02 答案长度不少于50字且不超过500个字 + #03 请仅输出答案,不要输出其他内容 + 例子: + 输入1: + 问题:openEuler是什么操作系统? + 文本:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能、高安全性和高可靠性等特点。 + 输出1:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。 + + 输入2: + 问题:openEuler的安全性如何? + 文本:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能、高安全性和高可靠性等特点。 + 输出2:openEuler具有高安全性。 + + 下面是给出的问题和文本: + 问题:{question} + 文本:{content} +' +CAL_QA_SCORE_PROMPT: '你是一个文本分析专家,你的任务是给出的问题 答案 片段 判断由片段生成的问题和答案的分数,输出一个0-100之间的数,保留两位小数 +请根据下面规则评估: +问题评估 +相关性:问题是否与给定片段的主题紧密相关,是否准确地基于片段内容提出,有无偏离或曲解片段的核心信息。 +合理性:问题的表述是否清晰、逻辑连贯,是否符合正常的语言表达和思维习惯,不存在语义模糊、歧义或自相矛盾的情况。 +多样性:如果有多个问题,问题之间的角度和类型是否具有一定的多样性,避免过于单一或重复,能否从不同方面挖掘片段的内容。 +难度:问题的难度是否适中,既不过于简单,使答案可以直接从片段中照搬,也不过于困难,让回答者难以从片段中找到线索或依据。 +答案评估 +准确性:答案是否准确无误地回答了问题,与片段中的信息是否一致,有无错误或遗漏关键要点。 +完整性:答案是否完整,涵盖了问题所涉及的各个方面,对于需要详细阐述的问题,是否提供了足够的细节和解释。 +简洁性:在保证回答完整准确的前提下,答案是否简洁明了,避免冗长、啰嗦的表述,能否以简洁的语言传达关键信息。 +连贯性:答案的逻辑是否清晰,各部分内容之间的衔接是否自然流畅,有无跳跃或混乱的情况。 +整体评估 +一致性:问题和答案之间是否相互匹配,答案是否是针对所提出的问题进行的回答,两者在内容和逻辑上是否保持一致。 +融合性:答案是否能够很好地融合片段中的信息,不仅仅是简单的摘抄,而是经过整合和提炼,以合理的方式呈现出来。 +创新性:在某些情况下,评估答案是否具有一定的创新性或独特见解,是否能够在片段信息的基础上进行适当的拓展或深入思考。 + +注意: +#01 请仅输出分数,不要输出其他内容 + +例子: +输入1: + 问题:openEuler是什么操作系统? + 答案:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能、高安全性和高可靠性等特点。 + 片段:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能、高安全性和高可靠性等特点。 + 输出1:100.00 + +下面是给出的问题、答案和片段: + 问题:{question} + 答案:{answer} + 片段:{fragment} +' +CONTENT_TO_ABSTRACT_PROMPT: '你是一个文本摘要专家,你的任务是根据给出的文本和摘要生成一个新的摘要 + 注意: + #01 请结合文本和摘要中最重要的内容生成新的摘要 + #02 新的摘要的长度必须大于200字小于500字 + #03 请仅输出新的摘要,不要输出其他内容 + 例子: + 输入1: + 文本:openEuler具有高性能、高安全性和高可靠性等特点。 + 摘要:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。 + 输出1:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。openEuler具有高性能、高安全性和高可靠性等特点。 + + 下面是给出的文本和摘要: + 文本:{content} + 摘要:{abstract} +' + +CONTENT_TO_TITLE_PROMPT: '你是一个标题提取专家,你的任务是根据给出的文本生成一个标题 + 注意: + #01 标题必须来源于文本中的内容 + #02 标题长度不超过20个字 + #03 请仅输出标题,不要输出其他内容 + 例子: + 输入:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能、高安全性和高可靠性等特点。 + 输出:openEuler操作系统概述 + 下面是给出的文本: + {content} +' + +ACC_RESULT_ANALYSIS_PROMPT: '你是一个文本分析专家,你的任务根据给出的 测试使用的大模型 embdding模型 测试相关文档的解析方法和分块大小 单条测试结果分析rag算法匹配到的片段分析当前知识库问答准确率提升的方法 +测试结果包含下面内容: +问题:测试使用的问题 +标准答案:测试使用的标准答案 +生成的答案:测试结果中大模型的答案 +原始片段:测试结果中原始片段 +检索的片段:测试结果中rag算法检索到的片段 +精确率:评估生成的答案与问题之间的语义相似程度,这个评分月越低说明使用的大模型遵从度越低,其次是rag检索到的片段缺少上下文,不足以支撑问题的回答 +召回率度:评估生成的答案与标准回答之间的语义相似程度,这个评分月越低说明使用的大模型遵从度越低 +忠实值:评估生成的答案中的内容是否来自于检索到的片段,这个评分越低说明rag检索算法和embedding模型的召回率越低,检索到的片段不足以回答问题,其次是文本分块大小不合理 +可解释性:评估生成的答案是否用于回答问题,这个评分越低说明rag检索算法和embedding模型的召回率越低,检索到的片段不足以回答问题,其次是使用的大模型遵从度越低 + +注意: +#01 请根据测试结果中的内容分析当前知识库问答准确率提升的方法 +#02 请结合召回率、精确度、忠实值和可解释性四个指标进行分析 +#03 分析结果长度不超过500字 +#04 请仅输出分析结果,不要输出其他内容 +例子: +输入: +模型名称:qwen2.5-32b +embedding模型:bge-m3 +文本的分块大小:512 +使用解析的rag算法:向量化检索 +问题:openEuler是什么操作系统? +标准答案:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能、高安全性和高可靠性等特点。 +生成的答案:openEuler是一个开源的操作系统,旨在为云计算和边缘计算提供支持。它具有高性能、高安全性和高可靠性等特点。 +原始片段:openEuler 是由开放原子开源基金会孵化及运营的开源操作系统,以构建面向数字基础设施的开源操作系统生态为使命,致力于为云计算、边缘计算等前沿领域提供坚实的底层支持。在云计算场景中,openEuler 能够充分优化资源调度与分配机制,通过轻量化的内核设计和高效的虚拟化技术,显著提升云服务的响应速度与吞吐量;在边缘计算领域,它凭借出色的低资源消耗特性与实时处理能力,保障了边缘节点在复杂环境下数据处理的及时性与准确性。 +openEuler 具备一系列卓越特性:在性能方面,其自主研发的智能调度算法能够动态适配不同负载场景,结合对硬件资源的深度优化利用,大幅提升系统运行效率;安全性上,通过内置的多层次安全防护体系,包括强制访问控制、漏洞扫描与修复机制,为系统数据与应用程序构筑起坚实的安全防线;可靠性层面,基于分布式存储、故障自动检测与快速恢复技术,确保系统在面对网络波动、硬件故障等突发状况时,依然能够稳定运行,最大限度降低服务中断风险。这些特性使 openEuler 成为推动数字经济高质量发展的重要技术基石,助力企业与开发者在数字化转型进程中抢占先机。 +检索的片段:openEuler 作为开源操作系统领域的先锋力量,深度融合了社区开发者的智慧结晶,不断迭代升级以适应快速变化的技术环境。在微服务架构盛行的当下,openEuler 通过对容器化技术的深度优化,支持 Kubernetes 等主流编排工具,让应用部署与管理变得更加便捷高效,极大提升了企业的业务部署灵活性。同时,它积极拥抱 AI 时代,通过对机器学习框架的适配与优化,为 AI 模型训练和推理提供强大的算力支撑,有效降低了 AI 应用的开发与运行成本。 +在生态建设方面,openEuler 拥有庞大且活跃的开源社区,汇聚了来自全球的技术爱好者与行业专家,形成了从内核开发、驱动适配到应用优化的完整生态链。社区定期举办技术交流与开发者大会,推动知识共享与技术创新,为开发者提供了丰富的学习资源与实践机会。众多硬件厂商和软件企业纷纷加入 openEuler 生态,推出基于该系统的解决方案和产品,涵盖金融、电信、能源等关键行业,以实际应用场景验证并反哺 openEuler 的技术发展,形成了良性循环的创新生态,让 openEuler 不仅是一个操作系统,更成为推动产业协同发展的强大引擎 。 + +召回率:95.00 +精确度:99.00 +忠实值:90.00 +可解释性:85.00 + +输出: +根据测试结果中的内容,当前知识库问答准确率提升的方法可以从以下几个方面进行分析: +召回率:目前召回率为 95.00,有提升空间。优化向量化检索算法,进一步挖掘原始片段中与问题相关但未被检索到的信息,如 openEuler 生态中一些具体实践案例等。调整 embedding 模型 bge-m3,使其能更全面准确地捕捉语义,扩大检索范围,提高召回率,使生成答案更接近标准答案。 +精确度:精确度达 99.00,已较高。但可进一步优化,对检索到的片段进行更深入的语义分析,结合大模型 qwen2.5-32b 的特点,精准匹配问题语义,避免细微语义偏差,例如更精确阐述 openEuler 在云计算和边缘计算中高性能等特性的具体表现。 +忠实值:忠实值为 90.00,说明部分答案内容未完全源于检索片段。优化 rag 检索算法,提高 embedding 模型召回率,调整文本分块大小为 512 可能存在不合理,需根据内容重新评估,确保检索片段包含足够上下文以支撑答案,使生成答案内容均来自检索片段,如关于 openEuler 生态建设中相关技术细节应从检索片段获取。 +可解释性:可解释性为 85.00,相对较低。提升大模型 qwen2.5-32b 的遵从度,优化 rag 检索算法和 embedding 模型 bge-m3 的召回率,使检索片段能更好支撑生成答案,保证答案能清晰回答问题,例如在回答 openEuler 相关问题时,使答案逻辑更清晰、针对性更强,提高整体可解释性。 + + +下面是测试结果中的内容: +使用的大模型:{model_name} +embedding模型:{embedding_model} +文本的分块大小:{chunk_size} +使用解析的rag算法:{rag_algorithm} +问题:{question} +标准答案:{standard_answer} +生成的答案:{generated_answer} +原始片段:{original_fragment} +检索的片段:{retrieved_fragment} +召回率:{recall} +精确度:{precision} +忠实值:{faithfulness} +可解释性:{relevance} +' +ACC_ANALYSIS_RESULT_MERGE_PROMPT: '你是一个文本分析专家,你的任务融合两条分析结果输出一份新的分析结果 +注意: +#01 请根据两条分析结果中的内容融合出一条新的分析结果 +#02 请结合召回率、精确度、忠实值和可解释性四个指标进行分析 +#03 新的分析结果长度不超过500字 +#04 请仅输出新的分析结果,不要输出其他内容 +例子: +输入1: +分析结果1: + +召回率:目前召回率为 95.00,有提升空间。优化向量化检索算法,进一步挖掘原始片段中与问题相关但未被检索到的信息,如 openEuler 生态中一些具体实践案例等。调整 embedding 模型 bge-m3,使其能更全面准确地捕捉语义,扩大检索范围,提高召回率,使生成答案更接近标准答案。 +精确度:精确度达 99.00,已较高。但可进一步优化,对检索到的片段进行更深入的语义分析,结合大模型 qwen2.5-32b 的特点,精准匹配问题语义,避免细微语义偏差,例如更精确阐述 openEuler 在云计算和边缘计算中高性能等特性的具体表现。 +忠实值:忠实值为 90.00,说明部分答案内容未完全源于检索片段。优化 rag 检索算法,提高 embedding 模型召回率,调整文本分块大小为 512 可能存在不合理,需根据内容重新评估,确保检索片段包含足够上下文以支撑答案,使生成答案内容均来自检索片段,如关于 openEuler 生态建设中相关技术细节应从检索片段获取。 +可解释性:可解释性为 85.00,相对较低。提升大模型 qwen2.5-32b 的遵从度,优化 rag 检索算法和 embedding 模型 bge-m3 的召回率,使检索片段能更好支撑生成答案,保证答案能清晰回答问题,例如在回答 openEuler 相关问题时,使答案逻辑更清晰、针对性更强,提高整体可解释性。 + +分析结果2: + +从召回率来看,目前为 95.00,可进一步优化 rag 检索算法和 embedding 模型,以提高生成答案与标准回答之间的语义相似程度,接近或达到更高的召回率,例如可以持续优化算法来更好地匹配相关片段。 +从精确度来看,为 99.00,接近满分,说明生成的答案与问题语义相似程度较高,但仍可进一步提升,可通过完善 embedding 模型来更好地理解问题语义,优化检索到的片段的上下文完整性,减少因上下文不足导致的精确度波动。 +对于忠实值,目前为 90.00,说明生成的答案中部分内容未完全来自检索到的片段。可优化 rag 检索算法,提高其召回率,同时合理调整文本分块大小,确保检索到的片段能充分回答问题,从而提高忠实值。 +关于可解释性,当前为 85.00,说明生成的答案在用于回答问题方面有一定提升空间。一方面可以优化使用的大模型,提高其遵从度,使其生成的答案更准确地回答问题;另一方面,继续优化 rag 检索算法和 embedding 模型的召回率,保证检索到的片段能全面支撑问题的回答,提高可解释性。 + +输出: +召回率:目前召回率为 95.00,有提升空间。优化向量化检索算法,进一步挖掘原始片段中与问题相关但未被检索到的信息,如 openEuler 生态中一些具体实践案例等。调整 embedding 模型 bge-m3,使其能更全面准确地捕捉语义,扩大检索范围,提高召回率,使生成答案更接近标准答案。 +精确度:精确度达 99.00,已较高。但可进一步优化,对检索到的片段进行更深入的语义分析,结合大模型 qwen2.5-32b 的特点,精准匹配问题语义,避免细微语义偏差,例如更精确阐述 openEuler 在云计算和边缘计算中高性能等特性的具体表现。 +忠实值:忠实值为 90.00,说明部分答案内容未完全源于检索片段。优化 rag 检索算法,提高 embedding 模型召回率,调整文本分块大小为 512 可能存在不合理,需根据内容重新评估,确保检索片段包含足够上下文以支撑答案,使生成答案内容均来自检索片段,如关于 openEuler 生态建设中相关技术细节应从检索片段获取。 +可解释性:可解释性为 85.00,相对较低。提升大模型 qwen2.5-32b 的遵从度,优化 rag 检索算法和 embedding 模型 bge-m3 的召回率,使检索片段能更好支撑生成答案,保证答案能清晰回答问题,例如在回答 openEuler 相关问题时,使答案逻辑更清晰、针对性更强,提高整体可解释性。 + +下面两条分析结果: +分析结果1:{analysis_result_1} +分析结果2:{analysis_result_2} +' + diff --git a/data_chain/config/config.py b/data_chain/config/config.py index 439951b..a413651 100644 --- a/data_chain/config/config.py +++ b/data_chain/config/config.py @@ -1,4 +1,4 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. import os import uuid from dotenv import dotenv_values @@ -13,16 +13,9 @@ class DictBaseModel(BaseModel): return None -class ModelConfig(DictBaseModel): - MODEL_ID: str = Field(default_factory=lambda: uuid.uuid4(), description="模型ID") - MODEL_NAME: str = Field(..., description="使用的语言模型名称或版本") - MODEL_TYPE: str = Field(..., description="语言模型类型") - OPENAI_API_BASE: str = Field(..., description="语言模型服务的基础URL") - OPENAI_API_KEY: str = Field(..., description="语言模型访问密钥") - MAX_TOKENS: int = Field(..., description="单次请求中允许的最大Token数") - - class ConfigModel(DictBaseModel): + # debug + DEBUG: bool = Field(default=False, description="是否启用调试模式") # FastAPI UVICORN_IP: str = Field(None, description="FastAPI 服务的IP地址") UVICORN_PORT: int = Field(None, description="FastAPI 服务的端口号") @@ -32,26 +25,29 @@ class ConfigModel(DictBaseModel): # LOG METHOD LOG_METHOD: str = Field('stdout', description="日志记录方式") # Database - DATABASE_TYPE: str = Field('postgres', description="数据库类型") DATABASE_URL: str = Field(None, description="Database数据库链接url") # MinIO MINIO_ENDPOINT: str = Field(None, description="MinIO连接地址") MINIO_ACCESS_KEY: str = Field(None, description="Minio认证ak") MINIO_SECRET_KEY: str = Field(None, description="MinIO认证sk") MINIO_SECURE: bool = Field(None, description="MinIO安全连接") - # Redis - REDIS_HOST: str = Field(None, description="redis地址") - REDIS_PORT: int = Field(None, description="redis端口") - REDIS_PWD: str = Field(None, description="redis密码") - REDIS_PENDING_TASK_QUEUE_NAME: str = Field(default='rag_pending_task_queue', description="redis等待开始任务队列名称") - REDIS_SUCCESS_TASK_QUEUE_NAME: str = Field(default='rag_success_task_queue', description="redis已经完成任务队列名称") - REDIS_RESTART_TASK_QUEUE_NAME: str = Field(default='rag_restart_task_queue', description="redis等待重启任务队列名称") - REDIS_SILENT_ERROR_TASK_QUEUE_NAME: str = Field( - default='rag_silent_error_task_queue', description="redis等待重启任务队列名称") + # MongoDB + MONGODB_USER: str = Field(None, description="mongodb认证用户名") + MONGODB_PASSWORD: str = Field(None, description="mongodb认证密码") + MONGODB_HOST: str = Field(None, description="mongodb地址") + MONGODB_PORT: int = Field(None, description="mongodb端口") + MONGODB_DATABASE: str = Field(None, description="mongodb数据库名称") # Task TASK_RETRY_TIME: int = Field(None, description="任务重试次数") + # LLM + MODEL_NAME: str = Field(None, description="模型名称") + OPENAI_API_BASE: str = Field(None, description="openai api base") + OPENAI_API_KEY: str = Field(None, description="openai api key") + REQUEST_TIMEOUT: int = Field(default=60, description="请求超时时间") + MAX_TOKENS: int = Field(None, description="最大token数") + TEMPERATURE: float = Field(default=0.7, description="温度系数") # Embedding - EMBEDDING_TYPE: str = Field("openai", description="embedding 服务的类型") + EMBEDDING_TYPE: str = Field(default="openai", description="embedding 服务的类型") EMBEDDING_API_KEY: str = Field(None, description="embedding服务api key") EMBEDDING_ENDPOINT: str = Field(None, description="embedding服务url地址") EMBEDDING_MODEL_NAME: str = Field(None, description="embedding模型名称") @@ -66,16 +62,10 @@ class ConfigModel(DictBaseModel): PROMPT_PATH: str = Field(None, description="prompt路径") # Stop Words PATH STOP_WORDS_PATH: str = Field(None, description="停用词表存放位置") - # LLM config - MODELS: List[ModelConfig] = Field(..., description="多个大模型的配置列表") - MODEL_ENH: bool = Field(None, description="是否使用大模型能力增强") - # DEFAULT USER - DEFAULT_USER_ACCOUNT: str = Field(default='admin', description="默认用户账号") - DEFAULT_USER_PASSWD: str = Field(default='8d969eef6ecad3c29a3a629280e686cf0c3f5d5a86aff3ca12020c923adc6c92', description="默认用户密码") - DEFAULT_USER_NAME: str = Field(default='admin', description="默认用户名称") - DEFAULT_USER_LANGUAGE: str = Field(default='zh', description="默认用户语言") # DOCUMENT PARSER DOCUMENT_PARSE_USE_CPU_LIMIT: int = Field(default=4, description="文档解析器使用CPU核数") + # Task Retry Time limit + TASK_RETRY_TIME_LIMIT: int = Field(default=3, description="任务重试次数限制") class Config: @@ -86,20 +76,7 @@ class Config: config_file = os.getenv("CONFIG") else: config_file = "data_chain/common/.env" - env_vars = dotenv_values(config_file) - - models_configs = [] - model_keys = set([k.split('_')[1] for k in env_vars.keys() if k.startswith('MODEL_')]) # 提取模型标识符 - - for model_key in model_keys: - single_model_config = { - k.replace(f'MODEL_{model_key}_', ''): v for k, v in env_vars.items() - if k.startswith(f'MODEL_{model_key}')} - models_configs.append(single_model_config) - self.config = ConfigModel( - MODELS=[ModelConfig(**model_cfg) for model_cfg in models_configs], - **{k: v for k, v in env_vars.items() if not k.startswith('MODEL_')} - ) + self.config = ConfigModel(**(dotenv_values(config_file))) if os.getenv("PROD"): os.remove(config_file) @@ -109,4 +86,4 @@ class Config: return None -config = Config() \ No newline at end of file +config = Config() diff --git a/data_chain/apps/service/embedding_service.py b/data_chain/embedding/embedding.py similarity index 68% rename from data_chain/apps/service/embedding_service.py rename to data_chain/embedding/embedding.py index 7822381..66512e1 100644 --- a/data_chain/apps/service/embedding_service.py +++ b/data_chain/embedding/embedding.py @@ -8,37 +8,39 @@ from data_chain.logger.logger import logger as logging urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) -class Vectorize(): +class Embedding(): @staticmethod async def vectorize_embedding(text): - if config['EMBEDDING_TYPE']=='openai': + if config['EMBEDDING_TYPE'] == 'openai': headers = { - "Authorization": f"Bearer {config['EMBEDDING_API_KEY']}" - } + "Authorization": f"Bearer {config['EMBEDDING_API_KEY']}" + } data = { "input": text, "model": config["EMBEDDING_MODEL_NAME"], "encoding_format": "float" } try: - res = requests.post(url=config["EMBEDDING_ENDPOINT"],headers=headers, json=data, verify=False) + res = requests.post(url=config["EMBEDDING_ENDPOINT"], headers=headers, json=data, verify=False) if res.status_code != 200: return None return res.json()['data'][0]['embedding'] except Exception as e: - logging.error(f"Embedding error failed due to: {e}") + err = f"[Embedding] 向量化失败 ,error: {e}" + logging.exception(err) return None - elif config['EMBEDDING_TYPE'] =='mindie': + elif config['EMBEDDING_TYPE'] == 'mindie': try: data = { - "inputs": text, + "inputs": text, } res = requests.post(url=config["EMBEDDING_ENDPOINT"], json=data, verify=False) if res.status_code != 200: return None return json.loads(res.text)[0] except Exception as e: - logging.error(f"Embedding error failed due to: {e}") - return None + err = f"[Embedding] 向量化失败 ,error: {e}" + logging.exception(err) + return None else: return None diff --git a/data_chain/entities/common.py b/data_chain/entities/common.py new file mode 100644 index 0000000..b826b46 --- /dev/null +++ b/data_chain/entities/common.py @@ -0,0 +1,350 @@ +import uuid +DEFAULt_DOC_TYPE_ID = uuid.UUID("00000000-0000-0000-0000-000000000000") +DEFAULt_DOC_TYPE_NAME = "default" +actions = [ + {'type': 'team', + 'name': '获取团队用户列表', 'action': 'POST /team/usr'}, + {'type': 'team', + 'name': '获取团队消息列表', 'action': 'POST /team/msg'}, + {'type': 'team', + 'name': '发送团队邀请', 'action': 'POST /team/invitation'}, + {'type': 'team', + 'name': '处理用户申请', 'action': 'PUT /usr_msg'}, + {'type': 'team', + 'name': '更新团队信息', 'action': 'PUT /team'}, + {'type': 'team', + 'name': '更新团队用户角色', 'action': 'PUT /team/usr'}, + {'type': 'team', + 'name': '移交团队', 'action': 'PUT /team/author'}, + {'type': 'team', + 'name': '解散团队', 'action': 'DELETE /team'}, + {'type': 'team', + 'name': '剔除团队用户', 'action': 'DELETE /team/usr'}, + {'type': 'knowledge_base', + 'name': '获取团队下知识库列表', 'action': 'POST /kb/team'}, + {'type': 'knowledge_base', + 'name': '获取知识库文档类型', 'action': 'GET /kb/doc_type'}, + {'type': 'knowledge_base', + 'name': '下载知识库文件', 'action': 'GET /kb/download'}, + {'type': 'knowledge_base', + 'name': '创建知识库', 'action': 'POST /kb'}, + {'type': 'knowledge_base', + 'name': '导入知识库', 'action': 'POST /kb/import'}, + {'type': 'knowledge_base', + 'name': '导出知识库', 'action': 'POST /kb/export'}, + {'type': 'knowledge_base', + 'name': '更新知识库信息', 'action': 'PUT /kb'}, + {'type': 'knowledge_base', + 'name': '删除知识库', 'action': 'DELETE /kb'}, + {'type': 'chunk', + 'name': '获取文档解析结果列表', 'action': 'POST /chunk/list'}, + {'type': 'chunk', + 'name': '更新文档解析结果', 'action': 'PUT /chunk'}, + {'type': 'document', + 'name': '获取文档列表', 'action': 'POST /doc/list'}, + {'type': 'document', + 'name': '下载文档', 'action': 'GET /doc/download'}, + {'type': 'document', + 'name': '创建文档', 'action': 'POST /doc'}, + {'type': 'document', + 'name': '解析文档', 'action': 'POST /doc/parse'}, + {'type': 'document', + 'name': '更新文档信息', 'action': 'PUT /doc'}, + {'type': 'document', + 'name': '删除文档', 'action': 'DELETE /doc'}, + {'type': 'dataset_data', + 'name': '获取数据集列表', 'action': 'POST /dataset/list'}, + {'type': 'dataset_data', + 'name': '获取测试数据列表', 'action': 'POST /dataset/data'}, + {'type': 'dataset_data', + 'name': '下载数据集', 'action': 'GET /dataset/download'}, + {'type': 'dataset_data', + 'name': '新建数据集', 'action': 'POST /dataset'}, + {'type': 'dataset_data', + 'name': '导入数据集', 'action': 'POST /dataset/import'}, + {'type': 'dataset_data', + 'name': '导出数据集', 'action': 'POST /dataset/export'}, + {'type': 'dataset_data', + 'name': '生成数据集', 'action': 'POST /dataset/generate'}, + {'type': 'dataset_data', + 'name': '修改数据集信息', 'action': 'PUT /dataset'}, + {'type': 'dataset_data', + 'name': '删除数据集', 'action': 'DELETE /dataset'}, + {'type': 'testing', + 'name': '获取测试列表', 'action': 'POST /testing/list'}, + {'type': 'testing', + 'name': '获取测试用例列表', 'action': 'POST /testing/testcase'}, + {'type': 'testing', + 'name': '下载测试结果', 'action': 'GET /testing/download'}, + {'type': 'testing', + 'name': '创建测试', 'action': 'POST /testing'}, + {'type': 'testing', + 'name': '运行测试', 'action': 'POST /testing/run'}, + {'type': 'testing', + 'name': '更新测试信息', 'action': 'PUT /testing'}, + {'type': 'testing', + 'name': '删除测试', 'action': 'DELETE /testing'}, + {'type': 'role', + 'name': '获取角色操作列表', 'action': 'GET /role/action'}, + {'type': 'role', + 'name': '获取角色列表', 'action': 'POST /role/list'}, + {'type': 'role', + 'name': '创建角色', 'action': 'POST /role'}, + {'type': 'role', + 'name': '更新角色信息', 'action': 'PUT /role'}, + {'type': 'role', + 'name': '删除角色', 'action': 'DELETE /role'}, + {'type': 'task', + 'name': '获取任务列表', 'action': 'POST /task'}, + {'type': 'task', + 'name': '获取任务报告', 'action': 'GET /task/report'}, + {'type': 'task', + 'name': '删除任务', 'action': 'DELETE /task'}] +default_roles = [ + { + "id": uuid.UUID("00000000-0000-0000-0000-000000000001"), + "name": "创建者", + "is_unique": True, + "actions": [ + {'type': 'team', + 'name': '获取团队用户列表', 'action': 'POST /team/usr'}, + {'type': 'team', + 'name': '获取团队消息列表', 'action': 'POST /team/msg'}, + {'type': 'team', + 'name': '发送团队邀请', 'action': 'POST /team/invitation'}, + {'type': 'team', + 'name': '处理用户申请', 'action': 'PUT /usr_msg'}, + {'type': 'team', + 'name': '更新团队信息', 'action': 'PUT /team'}, + {'type': 'team', + 'name': '更新团队用户角色', 'action': 'PUT /team/usr'}, + {'type': 'team', + 'name': '移交团队', 'action': 'PUT /team/author'}, + {'type': 'team', + 'name': '解散团队', 'action': 'DELETE /team'}, + {'type': 'team', + 'name': '剔除团队用户', 'action': 'DELETE /team/usr'}, + {'type': 'knowledge_base', + 'name': '获取团队下知识库列表', 'action': 'POST /kb/team'}, + {'type': 'knowledge_base', + 'name': '获取知识库文档类型', 'action': 'GET /kb/doc_type'}, + {'type': 'knowledge_base', + 'name': '下载知识库文件', 'action': 'GET /kb/download'}, + {'type': 'knowledge_base', + 'name': '创建知识库', 'action': 'POST /kb'}, + {'type': 'knowledge_base', + 'name': '导入知识库', 'action': 'POST /kb/import'}, + {'type': 'knowledge_base', + 'name': '导出知识库', 'action': 'POST /kb/export'}, + {'type': 'knowledge_base', + 'name': '更新知识库信息', 'action': 'PUT /kb'}, + {'type': 'knowledge_base', + 'name': '删除知识库', 'action': 'DELETE /kb'}, + {'type': 'chunk', + 'name': '获取文档解析结果列表', 'action': 'POST /chunk/list'}, + {'type': 'chunk', + 'name': '更新文档解析结果', 'action': 'PUT /chunk'}, + {'type': 'document', + 'name': '获取文档列表', 'action': 'POST /doc/list'}, + {'type': 'document', + 'name': '下载文档', 'action': 'GET /doc/download'}, + {'type': 'document', + 'name': '创建文档', 'action': 'POST /doc'}, + {'type': 'document', + 'name': '解析文档', 'action': 'POST /doc/parse'}, + {'type': 'document', + 'name': '更新文档信息', 'action': 'PUT /doc'}, + {'type': 'document', + 'name': '删除文档', 'action': 'DELETE /doc'}, + {'type': 'dataset_data', + 'name': '获取数据集列表', 'action': 'POST /dataset/list'}, + {'type': 'dataset_data', + 'name': '获取测试数据列表', 'action': 'POST /dataset/data'}, + {'type': 'dataset_data', + 'name': '下载数据集', 'action': 'GET /dataset/download'}, + {'type': 'dataset_data', + 'name': '新建数据集', 'action': 'POST /dataset'}, + {'type': 'dataset_data', + 'name': '导入数据集', 'action': 'POST /dataset/import'}, + {'type': 'dataset_data', + 'name': '导出数据集', 'action': 'POST /dataset/export'}, + {'type': 'dataset_data', + 'name': '生成数据集', 'action': 'POST /dataset/generate'}, + {'type': 'dataset_data', + 'name': '修改数据集信息', 'action': 'PUT /dataset'}, + {'type': 'dataset_data', + 'name': '删除数据集', 'action': 'DELETE /dataset'}, + {'type': 'testing', + 'name': '获取测试列表', 'action': 'POST /testing/list'}, + {'type': 'testing', + 'name': '获取测试用例列表', 'action': 'POST /testing/testcase'}, + {'type': 'testing', + 'name': '下载测试结果', 'action': 'GET /testing/download'}, + {'type': 'testing', + 'name': '创建测试', 'action': 'POST /testing'}, + {'type': 'testing', + 'name': '运行测试', 'action': 'POST /testing/run'}, + {'type': 'testing', + 'name': '更新测试信息', 'action': 'PUT /testing'}, + {'type': 'testing', + 'name': '删除测试', 'action': 'DELETE /testing'}, + {'type': 'role', + 'name': '获取角色操作列表', 'action': 'GET /role/action'}, + {'type': 'role', + 'name': '获取角色列表', 'action': 'POST /role/list'}, + {'type': 'role', + 'name': '创建角色', 'action': 'POST /role'}, + {'type': 'role', + 'name': '更新角色信息', 'action': 'PUT /role'}, + {'type': 'role', + 'name': '删除角色', 'action': 'DELETE /role'}, + {'type': 'task', + 'name': '获取任务列表', 'action': 'POST /task'}, + {'type': 'task', + 'name': '获取任务报告', 'action': 'GET /task/report'}, + {'type': 'task', + 'name': '删除任务', 'action': 'DELETE /task'}], + "editable": False, + }, { + "id": uuid.UUID("00000000-0000-0000-0000-000000000002"), + "name": "管理员", + "is_unique": False, + "actions": [ + {'type': 'team', + 'name': '获取团队用户列表', 'action': 'POST /team/usr'}, + {'type': 'team', + 'name': '获取团队消息列表', 'action': 'POST /team/msg'}, + {'type': 'team', + 'name': '发送团队邀请', 'action': 'POST /team/invitation'}, + {'type': 'team', + 'name': '处理用户申请', 'action': 'PUT /usr_msg'}, + {'type': 'team', + 'name': '更新团队用户角色', 'action': 'PUT /team/usr'}, + {'type': 'team', + 'name': '剔除团队用户', 'action': 'DELETE /team/usr'}, + {'type': 'knowledge_base', + 'name': '获取团队下知识库列表', 'action': 'POST /kb/team'}, + {'type': 'knowledge_base', + 'name': '获取知识库文档类型', 'action': 'GET /kb/doc_type'}, + {'type': 'knowledge_base', + 'name': '下载知识库文件', 'action': 'GET /kb/download'}, + {'type': 'knowledge_base', + 'name': '创建知识库', 'action': 'POST /kb'}, + {'type': 'knowledge_base', + 'name': '导入知识库', 'action': 'POST /kb/import'}, + {'type': 'knowledge_base', + 'name': '导出知识库', 'action': 'POST /kb/export'}, + {'type': 'knowledge_base', + 'name': '更新知识库信息', 'action': 'PUT /kb'}, + {'type': 'knowledge_base', + 'name': '删除知识库', 'action': 'DELETE /kb'}, + {'type': 'chunk', + 'name': '获取文档解析结果列表', 'action': 'POST /chunk/list'}, + {'type': 'chunk', + 'name': '更新文档解析结果', 'action': 'PUT /chunk'}, + {'type': 'document', + 'name': '获取文档列表', 'action': 'POST /doc/list'}, + {'type': 'document', + 'name': '下载文档', 'action': 'GET /doc/download'}, + {'type': 'document', + 'name': '创建文档', 'action': 'POST /doc'}, + {'type': 'document', + 'name': '解析文档', 'action': 'POST /doc/parse'}, + {'type': 'document', + 'name': '更新文档信息', 'action': 'PUT /doc'}, + {'type': 'document', + 'name': '删除文档', 'action': 'DELETE /doc'}, + {'type': 'dataset_data', + 'name': '获取数据集列表', 'action': 'POST /dataset/list'}, + {'type': 'dataset_data', + 'name': '获取测试数据列表', 'action': 'POST /dataset/data'}, + {'type': 'dataset_data', + 'name': '下载数据集', 'action': 'GET /dataset/download'}, + {'type': 'dataset_data', + 'name': '新建数据集', 'action': 'POST /dataset'}, + {'type': 'dataset_data', + 'name': '导入数据集', 'action': 'POST /dataset/import'}, + {'type': 'dataset_data', + 'name': '导出数据集', 'action': 'POST /dataset/export'}, + {'type': 'dataset_data', + 'name': '生成数据集', 'action': 'POST /dataset/generate'}, + {'type': 'dataset_data', + 'name': '修改数据集信息', 'action': 'PUT /dataset'}, + {'type': 'dataset_data', + 'name': '删除数据集', 'action': 'DELETE /dataset'}, + {'type': 'testing', + 'name': '获取测试列表', 'action': 'POST /testing/list'}, + {'type': 'testing', + 'name': '获取测试用例列表', 'action': 'POST /testing/testcase'}, + {'type': 'testing', + 'name': '下载测试结果', 'action': 'GET /testing/download'}, + {'type': 'testing', + 'name': '创建测试', 'action': 'POST /testing'}, + {'type': 'testing', + 'name': '运行测试', 'action': 'POST /testing/run'}, + {'type': 'testing', + 'name': '更新测试信息', 'action': 'PUT /testing'}, + {'type': 'testing', + 'name': '删除测试', 'action': 'DELETE /testing'}, + {'type': 'role', + 'name': '获取角色操作列表', 'action': 'GET /role/action'}, + {'type': 'role', + 'name': '获取角色列表', 'action': 'POST /role/list'}, + {'type': 'task', + 'name': '获取任务列表', 'action': 'POST /task'}, + {'type': 'task', + 'name': '获取任务报告', 'action': 'GET /task/report'}, + {'type': 'task', + 'name': '删除任务', 'action': 'DELETE /task'}], + "editable": False, + }, { + "id": uuid.UUID("00000000-0000-0000-0000-000000000003"), + "name": "成员", + "is_unique": False, + "actions": + [ + {'type': 'team', + 'name': '获取团队用户列表', 'action': 'POST /team/usr'}, + {'type': 'team', + 'name': '获取团队消息列表', 'action': 'POST /team/msg'}, + {'type': 'knowledge_base', + 'name': '获取团队下知识库列表', 'action': 'POST /kb/team'}, + {'type': 'knowledge_base', + 'name': '获取知识库文档类型', 'action': 'GET /kb/doc_type'}, + {'type': 'chunk', + 'name': '获取文档解析结果列表', 'action': 'POST /chunk/list'}, + {'type': 'document', + 'name': '获取文档列表', 'action': 'POST /doc/list'}, + {'type': 'document', + 'name': '下载文档', 'action': 'GET /doc/download'}, + {'type': 'dataset_data', + 'name': '获取数据集列表', 'action': 'POST /dataset/list'}, + {'type': 'dataset_data', + 'name': '获取测试数据列表', 'action': 'POST /dataset/data'}, + {'type': 'testing', + 'name': '获取测试列表', 'action': 'POST /testing/list'}, + {'type': 'testing', + 'name': '获取测试用例列表', 'action': 'POST /testing/testcase'}, + {'type': 'role', + 'name': '获取角色操作列表', 'action': 'GET /role/action'}, + {'type': 'role', + 'name': '获取角色列表', 'action': 'POST /role/list'} + ], + "editable": False, + } +] + +DOC_PATH_IN_MINIO = "witchaind-doc" +IMAGE_PATH_IN_MINIO = "witchaind-image" +EXPORT_KB_PATH_IN_MINIO = "witchaind-kb-export" +IMPORT_KB_PATH_IN_MINIO = "witchaind-kb-import" +EXPORT_DATASET_PATH_IN_MINIO = "witchaind-dataset-export" +IMPORT_DATASET_PATH_IN_MINIO = "witchaind-dataset-import" +TESTING_REPORT_PATH_IN_MINIO = "witchaind-testing-report" + +DOC_PATH_IN_OS = "./witchaind_doc" +EXPORT_KB_PATH_IN_OS = "./witchaind-kb-export" +IMPORT_KB_PATH_IN_OS = "./witchaind-kb-import" +EXPORT_DATASET_PATH_IN_OS = "./witchaind-dataset-export" +IMPORT_DATASET_PATH_IN_OS = "./witchaind-dataset-import" +TESTING_REPORT_PATH_IN_OS = "./witchaind-testing-report" diff --git a/data_chain/entities/enum.py b/data_chain/entities/enum.py new file mode 100644 index 0000000..838f1a6 --- /dev/null +++ b/data_chain/entities/enum.py @@ -0,0 +1,200 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +""" +枚举类型 + +Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +""" + +from enum import Enum + + +class TeamType(str, Enum): + """团队类型""" + MYCREATED = "mycreated" + MYJOINED = "myjoined" + ALL = "all" + + +class TeamStatus(str, Enum): + """团队状态""" + EXISTED = "existed" + DELETED = "deleted" + + +class Tokenizer(str, Enum): + """分词器""" + + ZH = "中文" + EN = "en" + MIX = "mix" + + +class Embedding(str, Enum): + """嵌入模型""" + BGEM3 = "bgem3" + + +class ParseMethod(str, Enum): + """解析方法""" + GENERAL = "general" + OCR = "ocr" + EHANCED = "enhanced" + QA = "qa" + + +class UserStatus(str, Enum): + """用户状态""" + ACTIVE = "active" + INACTIVE = "inactive" + DELETED = "deleted" + + +class UserMessageType(str, Enum): + """用户消息类型""" + Invitation = "invitation" + Application = "application" + + +class UserMessageStatus(str, Enum): + """用户消息状态""" + UNREAD = "unread" + ACCEPTED = "accepted" + REJECTED = "rejected" + + +class KnowledgeBaseStatus(str, Enum): + """知识库状态""" + IDLE = "idle" + PENDING = "pending" + EXPORTING = "exporting" + IMPORTING = "importing" + DELETED = "deleted" + + +class DocParseRelutTopology(str, Enum): + """解析结果拓扑""" + LIST = "list" + TREE = "tree" + GRAPH = "graph" + + +class DocumentStatus(str, Enum): + """文档状态""" + IDLE = "idle" + PENDING = "pending" + RUNNING = "running" + DELETED = "deleted" + + +class ImageStatus(str, Enum): + """图片状态""" + EXISTED = "existed" + DELETED = "deleted" + + +class ChunkStatus(str, Enum): + """分块状态""" + EXISTED = "existed" + DELETED = "deleted" + + +class ChunkType(str, Enum): + """分块类型""" + TEXT = "text" + TABLE = "table" + IMAGE = "image" + CODE = "code" + LINK = "link" + + +class ChunkParseTopology(str, Enum): + """分块解析拓扑""" + GERNERAL = "general" + LISTHEAD = "listhead" + LISTBODY = "listbody" + LISTTAIL = "listtail" + TREEROOT = "treeroot" + TREENORMAL = "treenormal" + TREELEAF = "treeleaf" + GRAPHNODE = "graphnode" + + +class DataSetStatus(str, Enum): + """数据集状态""" + IDLE = "idle" + PENDING = "pending" + GENERATING = "generating" + EXPORTING = "exporting" + IMPORTING = "importing" + DELETED = "deleted" + + +class QAStatus(str, Enum): + """问答状态""" + EXISTED = "existed" + DELETED = "deleted" + + +class TestingStatus(str, Enum): + """测试状态""" + IDLE = "idle" + PENDING = "pending" + RUNNING = "running" + DELETED = "deleted" + + +class TestCaseStatus(str, Enum): + """测试用例状态""" + EXISTED = "existed" + DELETED = "deleted" + + +class SearchMethod(str, Enum): + """搜索方法""" + KEYWORD = "keyword" + EMBEDDING = "embedding" + KEYWORD_EMBEDDING = "keyword_embedding" + DOC2CHUNK = "doc2chunk" + DOC2CHUNK_BFS = "doc2chunk_bfs" + ENHANCED_BY_LLM = "enhanced_by_llm" + + +class TaskType(str, Enum): + """任务类型""" + DOC_PARSE = "doc_parse" + KB_EXPORT = "kb_export" + KB_IMPORT = "kb_import" + DATASET_EXPORT = "dataset_export" + DATASET_IMPORT = "dataset_import" + DATASET_GENERATE = "dataset_generate" + TESTING_RUN = "testing_run" + + +class TaskStatus(str, Enum): + """任务状态""" + PENDING = "pending" + RUNNING = "running" + SUCCESS = "success" + FAILED = "failed" + CANCLED = "canceled" + DELETED = "deleted" + + +class OrderType(str, Enum): + """排序""" + ASC = "asc" + DESC = "desc" + + +class ActionType(str, Enum): + """操作类型""" + TEAM = "team" + USER = "user" + ROLE = "role" + KNOWLEDGE_BASE = "knowledge_base" + DOCUMENT = "document" + CHUNK = "chunk" + DATASET = "dataset" + TESTING = "testing" + DATASET_DATA = "dataset_data" + TASK = "task" diff --git a/data_chain/entities/request_data.py b/data_chain/entities/request_data.py new file mode 100644 index 0000000..b5d8e4c --- /dev/null +++ b/data_chain/entities/request_data.py @@ -0,0 +1,246 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +import re +import uuid +from typing import Dict, Generic, List, Optional, TypeVar +from pydantic import BaseModel, Field, validator, constr + +from data_chain.entities.enum import ( + TeamType, + Tokenizer, + ParseMethod, + UserStatus, + UserMessageType, + UserMessageStatus, + KnowledgeBaseStatus, + DocParseRelutTopology, + DocumentStatus, + ChunkType, + ChunkParseTopology, + DataSetStatus, + TestingStatus, + SearchMethod, + TaskType, + TaskStatus, + OrderType) +from data_chain.entities.common import DEFAULt_DOC_TYPE_ID + + +class ListTeamRequest(BaseModel): + team_type: TeamType = Field(default=TeamType.ALL, description="团队类型", alias="teamType") + team_id: Optional[uuid.UUID] = Field(default=None, description="团队id", alias="teamId") + team_name: Optional[str] = Field(default=None, description="团队名称", alias="teamName") + page: int = Field(default=1, description="页码") + page_size: int = Field(default=40, description="每页数量", alias="pageSize") + + +class ListTeamMsgRequest(BaseModel): + team_id: Optional[uuid.UUID] = Field(default=None, description="团队id", alias="teamId") + page: int = Field(default=1, description="页码") + page_size: int = Field(default=40, description="每页数量", alias="pageSize") + + +class ListTeamUserRequest(BaseModel): + team_id: uuid.UUID = Field(description="团队ID", alias="teamId") + user_sub: Optional[str] = Field(default=None, description="用户ID", alias="userSub") + user_name: Optional[str] = Field(default=None, description="用户名", alias="userName") + page: int = Field(default=1, description="页码") + page_size: int = Field(default=40, description="每页数量", alias="pageSize") + + +class CreateTeamRequest(BaseModel): + team_name: str = Field(default='这是一个默认的团队名称', min_length=1, max_length=30, alias="teamName") + description: str = Field(default='', max_length=150) + is_public: bool = Field(default=False) + + +class UpdateTeamRequest(BaseModel): + team_name: str = Field(default='这是一个默认的团队名称', min_length=1, max_length=30, alias="teamName") + description: str = Field(default='', max_length=150) + is_public: bool = Field(default=False) + + +class DocumentType(BaseModel): + doc_type_id: uuid.UUID + doc_type_name: str = Field(default='这是一个默认的文档类型名称', min_length=1, max_length=20) + + +class ListKnowledgeBaseRequest(BaseModel): + team_id: Optional[uuid.UUID] = Field(default=None, description="团队id", alias="teamId") + kb_id: Optional[uuid.UUID] = Field(default=None, description="资产id", alias="kbId") + kb_name: Optional[str] = Field(default=None, description="资产名称", alias="kbName") + author_name: Optional[str] = Field(default=None, description="资产创建者", alias="authorName") + page: int = Field(default=1, description="页码") + page_size: int = Field(default=40, description="每页数量", alias="pageSize") + + +class CreateKnowledgeBaseRequest(BaseModel): + kb_name: str = Field(default='这是一个默认的资产名称', min_length=1, max_length=20, alias="kbName") + description: str = Field(default='', max_length=150) + tokenizer: Tokenizer = Field(default=Tokenizer.ZH) + embedding_model: str = Field(default='', description="知识库使用的embedding模型", alias="embeddingModel") + default_chunk_size: int = Field(default=512, description="知识库默认文件分块大小", alias="defaultChunkSize", min=128, max=2048) + default_parse_method: ParseMethod = Field( + default=ParseMethod.GENERAL, description="知识库默认解析方法", alias="defaultParseMethod") + upload_count_limit: int = Field(default=128, description="知识库上传文件数量限制", alias="uploadCountLimit", min=128, max=1024) + upload_size_limit: int = Field(default=512, description="知识库上传文件大小限制", alias="uploadSizeLimit", min=128, max=2048) + doc_types: List[DocumentType] = Field(default=[], description="知识库支持的文档类型", alias="docTypes") + + +class UpdateKnowledgeBaseRequest(BaseModel): + kb_name: str = Field(default='这是一个默认的资产名称', min_length=1, max_length=30, alias="kbName") + description: str = Field(default='', max_length=150) + tokenizer: Tokenizer = Field(default=Tokenizer.ZH) + default_chunk_size: int = Field(default=512, description="知识库默认文件分块大小", alias="defaultChunkSize", min=128, max=2048) + default_parse_method: ParseMethod = Field( + default=ParseMethod.GENERAL, description="知识库默认解析方法", alias="defaultParseMethod") + upload_count_limit: int = Field(default=128, description="知识库上传文件数量限制", alias="uploadCountLimit", min=128, max=1024) + upload_size_limit: int = Field(default=512, description="知识库上传文件大小限制", alias="uploadSizeLimit", min=128, max=2048) + doc_types: List[DocumentType] = Field(default=[], description="知识库支持的文档类型", alias="docTypes") + + +class ListDocumentRequest(BaseModel): + kb_id: Optional[uuid.UUID] = Field(default=None, description="资产id", alias="kbId") + doc_id: Optional[uuid.UUID] = Field(default=None, description="文档id", min_length=1, max_length=30, alias="docId") + doc_name: Optional[str] = Field(default=None, description="文档名称", alias="docName") + doc_type_id: Optional[uuid.UUID] = Field(default=None, description="文档类型id", alias="docTypeId") + parse_status: Optional[list[TaskStatus]] = Field(default=None, description="文档解析状态", alias="parseStatus") + parse_method: Optional[ParseMethod] = Field(default=None, description="文档解析方法", alias="parseMethod") + author_name: Optional[str] = Field(default=None, description="文档创建者", alias="authorName") + created_time_start: Optional[str] = Field(default=None, description="文档创建时间开始", alias="createdTimeStart") + created_time_end: Optional[str] = Field(default=None, description="文档创建时间结束", alias="createdTimeEnd") + created_time_order: OrderType = Field(default=OrderType.DESC, description="文档创建时间排序", alias="createdTimeOrder") + page: int = Field(default=1, description="页码") + page_size: int = Field(default=40, description="每页数量", alias="pageSize") + + +class UpdateDocumentRequest(BaseModel): + doc_name: str = Field(default='这是一个默认的文档名称', min_length=1, max_length=150, alias="docName") + doc_type_id: uuid.UUID = Field(default=DEFAULt_DOC_TYPE_ID, description="文档类型的id", alias="docTypeId") + parse_method: ParseMethod = Field( + default=ParseMethod.GENERAL, description="知识库默认解析方法", alias="parseMethod") + chunk_size: int = Field(default=512, description="知识库默认文件分块大小", alias="chunkSize", min=128, max=2048) + enabled: bool = Field(default=True, description="文档是否启用") + + +class UpdateChunkRequest(BaseModel): + text: str = Field(min_length=1, description="分块文本内容") + enabled: bool = Field(default=True, description="分块是否启用") + + +class ListChunkRequest(BaseModel): + doc_id: uuid.UUID = Field(description="文档id", alias="docId") + text: Optional[str] = Field(default=None, description="分块文本内容", alias="text") + type: Optional[ChunkType] = Field(default=None, description="分块类型", alias="type") + page: int = Field(default=1, description="页码") + page_size: int = Field(default=40, description="每页数量", alias="pageSize") + + +class ListDatasetRequest(BaseModel): + kb_id: Optional[uuid.UUID] = Field(default=None, description="资产id", alias="kbId") + dataset_id: Optional[uuid.UUID] = Field(default=None, description="数据集id", alias="datasetId") + dataset_name: Optional[str] = Field(default=None, description="数据集名称", alias="datasetName") + data_cnt_order: Optional[OrderType] = Field(default=OrderType.DESC, description="数据集数据数量", alias="dataCnt") + llm_id: Optional[str] = Field(default=None, description="数据集使用的大模型id", alias="llmId") + is_data_cleared: Optional[bool] = Field(default=None, description="数据集是否清洗", alias="isDataCleared") + is_chunk_related: Optional[bool] = Field(default=None, description="数据集是否上下文关联", alias="isChunkRelated") + generate_status: Optional[List[TaskStatus]] = Field(default=None, description="数据集生成状态", alias="generateStatus") + score_order: Optional[OrderType] = Field(default=OrderType.DESC, description="数据集评分的排序方法", alias="scoreOrder") + author_name: Optional[str] = Field(default=None, description="数据集创建者", alias="authorName") + page: int = Field(default=1, description="页码") + page_size: int = Field(default=40, description="每页数量", alias="pageSize") + + +class ListDataInDatasetRequest(BaseModel): + dataset_id: uuid.UUID = Field(description="数据集id", alias="datasetId") + page: int = Field(default=1, description="页码") + page_size: int = Field(default=40, description="每页数量", alias="pageSize") + + +class CreateDatasetRequest(BaseModel): + kb_id: uuid.UUID = Field(description="资产id", alias="kbId") + dataset_name: str = Field(default='这是一个默认的数据集名称', description="测试数据集名称", + min_length=1, max_length=30, alias="datasetName") + description: str = Field(default='', description="测试数据集简介", max_length=200) + document_ids: List[uuid.UUID] = Field(default=[], description="测试数据集关联的文档", alias="documentIds") + data_cnt: int = Field(default=64, alias="dataCnt", description="测试数据集内的数据数量", min=1, max=512) + llm_id: str = Field(description="测试数据集使用的大模型id", alias="llmId") + is_data_cleared: bool = Field(default=False, description="测试数据集是否进行清洗", alias="isDataCleared") + is_chunk_related: bool = Field(default=False, description="测试数据集进行上下文关联", alias="isChunkRelated") + + +class UpdateDatasetRequest(BaseModel): + dataset_name: str = Field(default='这是一个默认的数据集名称', description="测试数据集名称", + min_length=1, max_length=30, alias="datasetName") + description: str = Field(default='', description="测试数据集简介", max_length=200) + + +class UpdateDataRequest(BaseModel): + question: str = Field(default='这是一个默认的问题', description="问题", + min_length=1, max_length=200, alias="question") + answer: str = Field(default='这是一个默认的答案', description="答案", + min_length=1, max_length=1000, alias="answer") + + +class ListTestingRequest(BaseModel): + kb_id: Optional[uuid.UUID] = Field(default=None, description="资产id", alias="kbId") + testing_id: Optional[uuid.UUID] = Field(default=None, description="测试id", alias="testingId") + testing_name: Optional[str] = Field(default=None, description="测试名称", alias="testingName") + llm_id: Optional[str] = Field(default=None, description="测试使用的大模型id", alias="llmId") + run_status: Optional[List[TaskStatus]] = Field(default=None, description="测试运行状态", alias="runStatus") + scores_order: Optional[OrderType] = Field(default=OrderType.DESC, description="测试评分", alias="scoresOrder") + author_name: Optional[str] = Field(default=None, description="测试创建者", alias="authorName") + page: int = Field(default=1, description="页码") + page_size: int = Field(default=40, description="每页数量", alias="pageSize") + + +class CreateTestingRequest(BaseModel): + testing_name: str = Field(default='这是一个默认的测试名称', description="测试名称", + min_length=1, max_length=30, alias="testingName") + description: str = Field(default='', description="测试简介", max_length=200) + dataset_id: uuid.UUID = Field(description="测试数据集id", alias="datasetId") + llm_id: str = Field(description="测试使用的大模型id", alias="llmId") + search_method: SearchMethod = Field(default=SearchMethod.KEYWORD_EMBEDDING, + description="测试使用的检索方法", alias="searchMethod") + + +class UpdateTestingRequest(BaseModel): + testing_name: str = Field(default='这是一个默认的测试名称', description="测试名称", + min_length=1, max_length=150, alias="testingName") + description: str = Field(default='', description="测试简介", max_length=200) + dataset_id: uuid.UUID = Field(description="测试数据集id", alias="datasetId") + llm_id: str = Field(description="测试使用的大模型id", alias="llmId") + search_method: SearchMethod = Field(default=SearchMethod.KEYWORD_EMBEDDING, + description="测试使用的检索方法", alias="searchMethod") + + +class ListRoleRequest(BaseModel): + team_id: Optional[uuid.UUID] = Field(default=None, description="团队id", alias="teamId") + role_id: Optional[uuid.UUID] = Field(default=None, description="角色id", alias="roleId") + role_name: Optional[str] = Field(default=None, description="角色名称", alias="roleName") + page: int = Field(default=1, description="页码") + page_size: int = Field(default=40, description="每页数量", alias="pageSize") + + +class CreateRoleRequest(BaseModel): + role_name: str = Field(default='这是一个默认的角色名称', min_length=1, max_length=30, alias="roleName") + actions: List[str] = Field(default=[], description="角色拥有的操作的列表", alias="actions") + + +class UpdateRoleRequest(BaseModel): + role_name: str = Field(default='这是一个默认的角色名称', min_length=1, max_length=30, alias="roleName") + actions: List[str] = Field(default=[], description="角色拥有的操作的列表", alias="actions") + + +class ListUserRequest(BaseModel): + user_name: Optional[str] = Field(default=None, description="用户名", alias="userName") + page: int = Field(default=1, description="页码") + page_size: int = Field(default=40, description="每页数量", alias="pageSize") + + +class ListTaskRequest(BaseModel): + team_id: Optional[uuid.UUID] = Field(default=None, description="团队id", alias="teamId") + task_id: Optional[uuid.UUID] = Field(default=None, description="任务id", alias="taskId") + task_type: Optional[TaskType] = Field(default=None, description="任务类型", alias="taskType") + task_status: Optional[TaskStatus] = Field(default=None, description="任务状态", alias="taskStatus") + page: int = Field(default=1, description="页码") + page_size: int = Field(default=40, description="每页数量", alias="pageSize") diff --git a/data_chain/entities/response_data.py b/data_chain/entities/response_data.py new file mode 100644 index 0000000..03038f8 --- /dev/null +++ b/data_chain/entities/response_data.py @@ -0,0 +1,654 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + +from typing import Any, Optional + +from pydantic import BaseModel, Field +import uuid + +from data_chain.entities.enum import ( + TeamType, + ActionType, + Tokenizer, + ParseMethod, + UserStatus, + UserMessageType, + UserMessageStatus, + KnowledgeBaseStatus, + DocParseRelutTopology, + DocumentStatus, + ChunkType, + ChunkParseTopology, + DataSetStatus, + TestingStatus, + SearchMethod, + TaskType, + TaskStatus, + OrderType) + + +class ResponseData(BaseModel): + """基础返回数据结构""" + + code: int = Field(default=200, description="返回码") + message: str = Field(default="", description="返回信息") + result: Any + + +class Team(BaseModel): + """团队信息""" + team_id: uuid.UUID = Field(description="团队ID", alias="teamId") + team_name: str = Field(min_length=1, max_length=30, description="团队名称", alias="teamName") + description: str = Field(max_length=150, description="团队描述") + author_name: str = Field(description="团队创建者的用户ID", alias="authorName") + member_cnt: int = Field(description="团队成员数量", alias="memberCount") + is_public: bool = Field(description="是否为公开团队", alias="isPublic") + created_time: str = Field(description="团队创建时间", alias="createdTime") + + +class ListTeamMsg(BaseModel): + """GET /team 数据结构""" + total: int = Field(default=0, description="总数") + teams: list[Team] = Field(default=[], description="团队列表") + + +class ListTeamResponse(ResponseData): + """GET /team 响应""" + + result: ListTeamMsg = Field(default=ListTeamMsg(), description="团队列表数据结构") + + +class TeamUser(BaseModel): + """团队成员信息""" + user_id: uuid.UUID = Field(description="用户ID", alias="userId") + user_name: str = Field(description="用户名", alias="userName") + role_name: str = Field(description="角色名称", alias="roleName") + + +class ListTeamUserMsg(BaseModel): + """GET /team/usr 数据结构""" + total: int = Field(default=0, description="总数") + team_users: list[TeamUser] = Field(default=[], description="团队成员列表", alias="teamUsers") + + +class ListTeamUserResponse(ResponseData): + result: ListTeamUserMsg = Field(default=ListTeamUserMsg(), description="团队成员列表数据结构") + + +class TeamMsg(BaseModel): + """团队信息""" + msg_id: uuid.UUID = Field(description="消息ID", alias="msgId") + author_name: str = Field(description="消息发送者的用户名", alias="authorName") + message: str = Field(description="消息内容") + + +class ListTeamMsgMsg(BaseModel): + """GET /team/msg 数据结构""" + total: int = Field(default=0, description="总数") + team_msgs: list[TeamMsg] = Field(default=[], description="团队消息列表", alias="teamMsgs") + + +class ListTeamMsgResponse(ResponseData): + result: ListTeamMsgMsg = Field(default=ListTeamMsgMsg(), description="团队消息列表数据结构") + + +class CreateTeamResponse(ResponseData): + """POST /team 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="团队ID") + + +class InviteTeamUserResponse(ResponseData): + """POST /team/invitation 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="邀请ID") + + +class JoinTeamResponse(ResponseData): + """POST /team/application 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="申请ID") + + +class UpdateTeamResponse(ResponseData): + """PUT /team 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="团队ID") + + +class UpdateTeamUserRoleResponse(ResponseData): + """PUT /team/usr 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="团队成员ID") + + +class UpdateTeamAuthorResponse(ResponseData): + """PUT /team/author 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="团队ID") + + +class DeleteTeamResponse(ResponseData): + """DELETE /team 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="团队ID") + + +class DeleteTeamUserResponse(ResponseData): + """DELETE /team/usr 响应""" + result: list[uuid.UUID] = Field(default=[], description="团队成员ID列表") + + +class DocumentType(BaseModel): + """文档类型信息""" + doc_type_id: uuid.UUID = Field(description="文档类型ID", alias="docTypeId") + doc_type_name: str = Field(description="文档类型名称", alias="docTypeName") + + +class Knowledgebase(BaseModel): + """知识库信息""" + kb_id: uuid.UUID = Field(description="知识库ID", alias="kbId") + kb_name: str = Field(description="知识库名称", min=1, max=20, alias="kbName") + author_name: str = Field(description="知识库创建者的用户名", alias="authorName") + tokenizer: Tokenizer = Field(description="分词器", alias="tokenizer") + embedding_model: str = Field(description="嵌入模型", alias="embeddingModel") + description: str = Field(description="知识库描述", max=150) + doc_cnt: int = Field(description="知识库文档数量", alias="docCnt") + doc_size: int = Field(description="知识库文档大小", alias="docSize") + upload_count_limit: int = Field(description="知识库单次文件上传数量限制", alias="uploadCountLimit") + upload_size_limit: int = Field(description="知识库单次文件上传大小限制", alias="uploadSizeLimit") + default_parse_method: ParseMethod = Field(description="默认解析方法", alias="defaultParseMethod") + default_chunk_size: int = Field(description="默认分块大小", alias="defaultChunkSize") + created_time: str = Field(description="知识库创建时间", alias="createdTime") + doc_types: list[DocumentType] = Field(default=[], description="知识库文档类型列表", alias="docTypes") + + +class TeamKnowledgebase(BaseModel): + """团队知识库信息""" + team_id: uuid.UUID = Field(description="团队ID", alias="teamId") + team_name: str = Field(description="团队名称", alias="teamName") + kb_list: list[Knowledgebase] = Field(default=[], description="知识库列表", alias="kbList") + + +class ListAllKnowledgeBaseMsg(BaseModel): + """GET /kb 数据结构""" + team_knowledge_bases: list[TeamKnowledgebase] = Field(default=[], description="团队知识库列表", alias="teamKnowledgebases") + + +class ListAllKnowledgeBaseResponse(ResponseData): + """GET /kb 响应""" + result: ListAllKnowledgeBaseMsg = Field(default=ListAllKnowledgeBaseMsg(), description="团队知识库列表数据结构") + + +class ListKnowledgeBaseMsg(BaseModel): + total: int = Field(default=0, description="总数") + kb_list: list[Knowledgebase] = Field(default=[], description="知识库列表数据结构", alias="kbList") + + +class ListKnowledgeBaseResponse(ResponseData): + """GET /kb/team 响应""" + result: ListKnowledgeBaseMsg = Field(ListKnowledgeBaseMsg()) + + +class ListDocumentTypesResponse(ResponseData): + """GET /kb/doc_type 响应""" + result: list[DocumentType] = Field(default=[], description="文档类型列表数据结构") + + +class Task(BaseModel): + """任务信息""" + op_id: uuid.UUID = Field(description="关联实体ID", alias="opId") + op_name: str = Field(description="关联实体的名称", alias="opName") + task_id: uuid.UUID = Field(description="任务ID", alias="taskId") + task_status: TaskStatus = Field(description="任务状态", alias="taskStatus") + task_type: TaskType = Field(description="任务类型", alias="taskType") + task_completed: float = Field(description="任务完成度", alias="taskCompleted") + created_time: str = Field(description="任务创建时间", alias="createdTime") + + +class CreateKnowledgeBaseResponse(ResponseData): + """POST /kb 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="知识库ID") + + +class ImportKnowledgeBaseResponse(ResponseData): + """POST /kb/import 响应""" + result: list[uuid.UUID] = Field(default=[], description="任务ID") + + +class ExportKnowledgeBaseResponse(ResponseData): + """POST /kb/export 响应""" + result: list[uuid.UUID] = Field(default=[], description="任务ID") + + +class UpdateKnowledgeBaseResponse(ResponseData): + """PUT /kb 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="知识库ID") + + +class DeleteKnowledgeBaseResponse(ResponseData): + """DELETE /kb 响应""" + result: list[uuid.UUID] = Field(default=[], description="知识库ID列表") + + +class Document(BaseModel): + """文档信息""" + doc_id: uuid.UUID = Field(description="文档ID", alias="docId") + doc_name: str = Field(description="文档名称", alias="docName") + doc_type: DocumentType = Field(description="文档类型", alias="docType") + chunk_size: int = Field(description="文档分片大小", alias="chunkSize") + created_time: str = Field(description="文档创建时间", alias="createdTime") + parse_task: Optional[Task] = Field(description="文档任务", alias="docTask") + parse_method: ParseMethod = Field(description="文档解析方法", alias="parseMethod") + enabled: bool = Field(description="文档是否启用", alias="enabled") + author_name: str = Field(description="文档创建者的用户名", alias="authorName") + status: DocumentStatus = Field(description="文档状态", alias="status") + + +class ListDocumentMsg(BaseModel): + """GET /doc 数据结构""" + total: int = Field(default=0, description="总数") + documents: list[Document] = Field(default=[], description="文档列表", alias="documents") + + +class ListDocumentResponse(ResponseData): + """GET /doc 响应""" + result: ListDocumentMsg = Field(default=ListDocumentMsg(), description="文档列表数据结构") + + +class GetDocumentReportResponse(ResponseData): + """GET /doc/report 响应""" + result: str = Field(default="", description="文档报告数据结构") + + +class UploadDocumentResponse(ResponseData): + """POST /doc 响应""" + result: list[uuid.UUID] = Field(default=[], description="文档ID列表") + + +class ParseDocumentResponse(ResponseData): + """POST /doc/parse 响应""" + result: list[uuid.UUID] = Field(default=[], description="文档ID列表") + + +class UpdateDocumentResponse(ResponseData): + """PUT /doc 响应""" + result: uuid.UUID = Field(default=None, description="文档ID") + + +class DeleteDocumentResponse(ResponseData): + """DELETE /doc 响应""" + result: list[uuid.UUID] = Field(default=[], description="文档ID列表") + + +class Chunk(BaseModel): + """文档分片信息""" + chunk_id: uuid.UUID = Field(description="分片ID", alias="chunkId") + chunk_type: ChunkType = Field(description="分片类型", alias="chunkType") + text: str = Field(description="分片文本") + + +class ListChunkMsg(BaseModel): + """GET /chunk 数据结构""" + total: int = Field(default=0, description="总数") + chunks: list[Chunk] = Field(default=[], description="分片列表", alias="chunks") + + +class ListChunkResponse(ResponseData): + """GET /chunk 响应""" + result: ListChunkMsg = Field(default=ListChunkMsg(), description="分片列表数据结构") + + +class UpdateChunkResponse(ResponseData): + """PUT /chunk 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="分片ID") + + +class LLM(BaseModel): + llm_id: str = Field(description="大模型ID", alias="llmId") + llm_name: str = Field(description="大模型名称", min=1, max=20, alias="llmName") + llm_icon: str = Field(description="大模型图标", alias="llmIcon") + + +class Dataset(BaseModel): + """数据集信息""" + dataset_id: uuid.UUID = Field(description="数据集ID", alias="datasetId") + dataset_name: str = Field(description="数据集名称", min=1, max=20, alias="datasetName") + description: str = Field(description="数据集描述", max=150) + data_cnt: int = Field(description="数据集数据数量", alias="dataCnt") + is_data_cleared: bool = Field(default=False, description="数据集是否进行清洗", alias="isDataCleared") + is_chunk_related: bool = Field(default=False, description="数据集进行上下文关联", alias="isChunkRelated") + llm: Optional[LLM] = Field(default=None, description="生成数据集使用的大模型信息", alias="llm") + generate_task: Optional[Task] = Field(default=None, description="数据集生成任务", alias="generateTask") + score: Optional[float] = Field(description="数据集评分", default=None) + author_name: str = Field(description="数据集创建者的用户名", alias="authorName") + status: DataSetStatus = Field(description="数据集状态", alias="status") + + +class ListDatasetMsg(BaseModel): + """GET /dataset 数据结构""" + total: int = Field(default=0, description="总数") + datasets: list[Dataset] = Field(default=[], description="数据集列表", alias="datasets") + + +class ListDatasetResponse(ResponseData): + """GET /dataset 响应""" + result: ListDatasetMsg = Field(default=ListDatasetMsg(), description="数据集列表数据结构") + + +class Data(BaseModel): + data_id: uuid.UUID = Field(description="数据ID", alias="dataId") + doc_name: str = Field(description="数据关联的文档名称", alias="docName") + question: str = Field(description="数据的问题") + answer: str = Field(description="数据的答案") + chunk: str = Field(description="数据的片段") + + +class ListDataInDatasetMsg(BaseModel): + """GET /dataset/data 数据结构""" + total: int = Field(default=0, description="总数") + datas: list[Data] = Field(default=[], description="数据列表", alias="data") + + +class ListDataInDatasetResponse(ResponseData): + """GET /dataset/data 响应""" + result: ListDataInDatasetMsg = Field(default=ListDataInDatasetMsg(), description="数据列表数据结构") + + +class IsDatasetHaveTestingResponse(ResponseData): + """GET /dataset/testing/exist 响应""" + result: bool = Field(default=False, description="数据集是否有测试任务") + + +class ListDatasetTaskResponse(ResponseData): + """GET /dataset/task 响应""" + result: list[Task] = Field(default=[], description="数据集任务列表数据结构") + + +class CreateDatasetResponse(ResponseData): + """POST /dataset 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="数据集生成任务ID") + + +class ImportDatasetResponse(ResponseData): + """POST /dataset/import 响应""" + result: list[uuid.UUID] = Field(default=[], description="任务ID列表") + + +class ExportDatasetResponse(ResponseData): + """POST /dataset/export 响应""" + result: list[uuid.UUID] = Field(default=[], description="任务ID列表") + + +class GenerateDatasetResponse(ResponseData): + """POST /dataset/generate 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="数据集ID") + + +class UpdateDatasetResponse(ResponseData): + """PUT /dataset 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="数据集ID") + + +class UpdateDataResponse(ResponseData): + """PUT /dataset/data 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="数据ID") + + +class DeleteDatasetResponse(ResponseData): + """DELETE /dataset 响应""" + result: list[uuid.UUID] = Field(default=[], description="数据集ID列表") + + +class DeleteDataResponse(ResponseData): + """DELETE /dataset/data 响应""" + result: list[uuid.UUID] = Field(default=[], description="数据ID列表") + + +class Testing(BaseModel): + """测试信息""" + testing_id: uuid.UUID = Field(description="测试ID", alias="testingId") + testing_name: str = Field(description="测试名称", min=1, max=20, alias="testingName") + description: str = Field(description="测试描述", max=150) + llm: Optional[LLM] = Field(default=None, description="测试使用的大模型信息", alias="llm") + search_method: SearchMethod = Field(description="搜索方法", alias="searchMethod") + testing_task: Optional[Task] = Field(default=None, description="测试任务", alias="testingTask") + ave_score: float = Field(default=-1, description="综合得分", alias="aveScore") + ave_pre: float = Field(default=-1, description="精确率", alias="avePre") # 精确度 + ave_rec: float = Field(default=-1, description="召回率", alias="aveRec") # 召回率 + ave_fai: float = Field(default=-1, description="忠实值", alias="aveFai") # 忠实值 + ave_rel: float = Field(default=-1, description="可解释性", alias="aveRel") # 可解释性 + ave_lcs: float = Field(default=-1, description="最长公共子串得分", alias="aveLcs") # 最长公共子序列得分 + ave_leve: float = Field(default=-1, description="编辑距离得分", alias="aveLeve") # 编辑距离得分 + ave_jac: float = Field(default=-1, description="杰卡德相似系数", alias="aveJac") # 杰卡德相似系数 + author_name: str = Field(description="测试创建者的用户名", alias="authorName") + status: TestingStatus = Field(description="测试状态", alias="status") + + +class DatasetTesting(BaseModel): + """数据集测试信息""" + dataset_id: uuid.UUID = Field(description="数据集ID", alias="datasetId") + dataset_name: str = Field(description="数据集名称", alias="datasetName") + testings: list[Testing] = Field(default=[], description="测试列表", alias="testings") + + +class ListTestingMsg(BaseModel): + """GET /testing 数据结构""" + total: int = Field(default=0, description="总数") + dataset_testings: list[DatasetTesting] = Field(default=[], description="数据集测试列表", alias="datasetTestings") + + +class ListTestingResponse(ResponseData): + """GET /testing 响应""" + result: ListTestingMsg = Field(default=ListTestingMsg(), description="测试列表数据结构") + + +class TestCase(BaseModel): + """测试用例信息""" + test_case_id: uuid.UUID = Field(description="测试用例ID", alias="testCaseId") + question: str = Field(description="问题") + answer: str = Field(description="标准答案") + llm_answer: str = Field(description="大模型的回答") + related_chunk: str = Field(description="检索到的片段") + doc_name: str = Field(description="来源文档") + score: float = Field(description="综合得分") + pre: float = Field(description="精确率") # 精确度 + rec: float = Field(description="召回率") # 召回率 + fai: float = Field(description="忠实值") # 忠实值 + rel: float = Field(description="可解释性") # 可解释性 + lcs: float = Field(description="最长公共子串得分") # 最长公共子序列得分 + leve: float = Field(description="编辑距离得分") # 编辑距离得分 + jac: float = Field(description="杰卡德相似系数") # 杰卡德相似系数 + + +class TestingTestCase(BaseModel): + """GET /testing/testcase 数据结构""" + ave_score: float = Field(description="平均综合得分", alias="aveScore") + ave_pre: float = Field(description="平均精确率", alias="avePre") + ave_rec: float = Field(description="平均召回率", alias="aveRec") + ave_fai: float = Field(description="平均忠实值", alias="aveFai") + ave_rel: float = Field(description="平均可解释性", alias="aveRel") + ave_lcs: float = Field(description="平均最长公共子串得分", alias="aveLcs") + ave_leve: float = Field(description="平均编辑距离得分", alias="aveLeve") + ave_jac: float = Field(description="平均杰卡德相似系数", alias="aveJac") + test_cases: list[TestCase] = Field(default=[], description="测试用例列表", alias="testCases") + + +class ListTestCaseResponse(ResponseData): + """GET /testing/testcase 响应""" + result: TestingTestCase = Field(default=TestingTestCase(), description="测试用例列表数据结构") + + +class CreateTestingResponsing(ResponseData): + """POST /testing 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="测试ID") + + +class RunTestingResponse(ResponseData): + """POST /testing/run 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="测试ID") + + +class UpdateTestingResponse(ResponseData): + """PUT /testing 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="测试ID") + + +class DeleteTestingResponse(ResponseData): + """DELETE /testing 响应""" + result: list[uuid.UUID] = Field(default=[], description="测试ID列表") + + +class action(BaseModel): + """操作信息""" + action_name: str = Field(description="操作名称", min=1, max=20, alias="actionName") + action: str = Field(description="操作", min=1, max=20) + is_used: bool = Field(description="是否启用", alias="isUsed") + + +class TypeAction(BaseModel): + """不同类别的类别操作""" + action_type: ActionType = Field(description="操作类型", alias="actionType") + actions: list[action] = Field(default=[], description="操作列表", alias="actions") + + +class ListActionMsg(BaseModel): + """GET /role/action 数据结构""" + type_actions: list[TypeAction] = Field(default=[], description="操作类型列表", alias="actionTypes") + + +class ListActionResponse(ResponseData): + result: ListActionMsg = Field(default=ListActionMsg(), description="操作列表数据结构") + + +class role(BaseModel): + """角色信息""" + role_id: uuid.UUID = Field(description="角色ID", alias="roleId") + role_name: str = Field(description="角色名称", min=1, max=20, alias="roleName") + type_actions: list[TypeAction] = Field(default=[], description="操作类型列表", alias="typeActions") + + +class ListRoleMsg(BaseModel): + """GET /role 数据结构""" + roles: list[role] = Field(default=[], description="角色列表", alias="roles") + + +class ListRoleResponse(ResponseData): + """GET /role 响应""" + result: ListRoleMsg = Field(default=ListRoleMsg(), description="角色列表数据结构") + + +class CreateRoleResponse(ResponseData): + """POST /role 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="角色ID") + + +class UpdateRoleResponse(ResponseData): + """PUT /role 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="角色ID") + + +class DeleteRoleResponse(ResponseData): + """DELETE /role 响应""" + result: list[uuid.UUID] = Field(default=[], description="角色ID列表") + + +class UserMsg(BaseModel): + """用户消息""" + team_id: uuid.UUID = Field(description="团队ID", alias="teamId") + msg_id: uuid.UUID = Field(description="消息ID", alias="msgId") + sender_id: uuid.UUID = Field(description="发送者ID", alias="senderId") + sender_name: str = Field(description="发送者名称", alias="senderName") + receiver_id: uuid.UUID = Field(description="接收者ID", alias="receiverId") + receiver_name: str = Field(description="接收者名称", alias="receiverName") + msg_type: UserMessageType = Field(description="消息类型", alias="msgType") + msg_status: UserMessageStatus = Field(description="消息状态", alias="msgStatus") + created_time: str = Field(description="创建时间", alias="createdTime") + + +class ListUserMessageMsg(BaseModel): + """GET /usr_msg 数据结构""" + total: int = Field(default=0, description="总数") + user_messages: list[UserMsg] = Field(default=[], description="用户消息列表", alias="userMessages") + + +class ListUserMessageResponse(ResponseData): + result: ListUserMessageMsg = Field(default=ListUserMessageMsg(), description="用户消息列表数据结构") + + +class UpdateUserMessageResponse(ResponseData): + """PUT /usr_msg 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="消息ID") + + +class DeleteUserMessageResponse(ResponseData): + """DELETE /usr_msg 响应""" + result: list[uuid.UUID] = Field(default=[], description="消息ID列表") + + +class User(BaseModel): + """用户数据结构""" + user_sub: str = Field(description="用户id") + user_name: str = Field(description="用户名称") + + +class ListUserMsg(BaseModel): + """GET /user 数据结构""" + users: list[User] = Field(default=[], description="用户列表") + + +class ListUserResponse(ResponseData): + result: ListUserMsg = Field(ListUserMsg(), description="大模型列表数据结构") + + +class ListLLMMsg(BaseModel): + """GET /other/llm 数据结构""" + llms: list[LLM] = Field(default=[], description="大模型列表", alias="llms") + + +class ListLLMResponse(ResponseData): + """GET /other/llm 响应""" + result: ListLLMMsg = Field(default=ListLLMMsg(), description="大模型列表数据结构") + + +class Entity(BaseModel): + name: str = Field(description="实体名称") + description: str = Field(description="实体描述") + + +class ListEmbeddingResponse(ResponseData): + """GET /other/embedding 数据结构""" + result: list[str] = Field(default=[], description="向量化模型的列表数据结构") + + +class ListTokenizerResponse(ResponseData): + """GET /other/tokenizer 响应""" + result: list[str] = Field(default=[], description="分词器的列表数据结构") + + +class ListParseMethodResponse(ResponseData): + """"GET /other/parse_method 响应""" + result: list[str] = Field(default=[], description="解析方法的列表数据结构") + + +class ListSearchMethodResponse(ResponseData): + """GET /other/search_method 响应""" + result: list[str] = Field(default=[], description="搜索方法的列表数据结构") + + +class ListTaskMsg(BaseModel): + """GET /task 数据结构""" + total: int = Field(default=0, description="总数") + tasks: list[Task] = Field(default=[], description="任务列表", alias="tasks") + + +class ListTaskResponse(ResponseData): + """GET /task 响应""" + result: ListTaskMsg = Field(default=ListTaskMsg(), description="任务列表数据结构") + + +class GetTaskReportResponse(ResponseData): + """GET /task/report 响应""" + result: str = Field(default='', description="任务报告") + + +class DeleteTaskByIdResponse(ResponseData): + """DELETE /task/one 响应""" + result: Optional[uuid.UUID] = Field(default=None, description="任务ID") + + +class DeleteTaskByTypeResponse(ResponseData): + """DELETE /task/all 响应""" + result: list[uuid.UUID] = Field(default=[], description="任务ID列表") diff --git a/data_chain/exceptions/err_code.py b/data_chain/exceptions/err_code.py deleted file mode 100644 index e412882..0000000 --- a/data_chain/exceptions/err_code.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from enum import IntEnum - - -# TODO: 补充自定义错误信息 -class ErrorCode(IntEnum): - CREATE_KNOWLEDGE_BASE_ERROR = 10001 - UPDATE_KNOWLEDGE_BASE_ERROR = 10002 - LIST_KNOWLEDGE_BASE_ERROR = 10003 - DELETE_KNOWLEDGE_BASE_ERROR = 10004 - IMPORT_KNOWLEDGE_BASE_ERROR = 10005 - SAVE_KNOWLEDGE_BASE_ERROR = 10006 - EXPORT_KNOWLEDGE_BASE_ERROR = 10007 - QUERY_IMPORT_KNOWLEDGE_BASE_STATUS_ERROR = 10008 - LIST_KNOWLEDGE_BASE_TASK_ERROR = 10009 - STOP_KNOWLEDGE_BASE_TASK_ERROR = 10010 - - CREATE_DOCUMENT_ERROR = 20001 - RENAME_DOCUMENT_ERROR = 20002 - LIST_DOCUMENT_ERROR = 20003 - RUN_DOCUMENT_ERROR = 20004 - SWITCH_DOCUMENT_ERROR = 20005 - DELETE_DOCUMENT_ERROR = 20006 - UPLOAD_DOCUMENT_ERROR = 20007 - DOWNLOAD_DOCUMENT_ERROR = 20008 - LIST_DOCUMENT_TASK_ERROR = 20009 - STOP_DOCUMENT_TASK_ERROR = 20010 - - CREATE_CHUNK_ERROR = 30001 - LIST_CHUNK_ERROR = 30002 - SWITCH_CHUNK_ERROR = 30003 - - CREATE_MODEL_ERROR = 40001 - UPDATE_MODEL_ERROR = 40002 - LIST_MODEL_ERROR = 40003 diff --git a/data_chain/exceptions/exception.py b/data_chain/exceptions/exception.py deleted file mode 100644 index 8e8ac7b..0000000 --- a/data_chain/exceptions/exception.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -class KnowledgeBaseException(Exception): - ... - - -class DocumentException(Exception): - ... - - -class MinioException(Exception): - ... - - -class ChunkException(Exception): - ... - -class TaskException(Exception): - ... -class ModelException(Exception): - ... \ No newline at end of file diff --git a/data_chain/llm/icon/ollama.svg b/data_chain/llm/icon/ollama.svg new file mode 100644 index 0000000..cc887e3 --- /dev/null +++ b/data_chain/llm/icon/ollama.svg @@ -0,0 +1 @@ +Ollama \ No newline at end of file diff --git a/data_chain/apps/base/model/llm.py b/data_chain/llm/llm.py similarity index 68% rename from data_chain/apps/base/model/llm.py rename to data_chain/llm/llm.py index eccc32c..7fd4e59 100644 --- a/data_chain/apps/base/model/llm.py +++ b/data_chain/llm/llm.py @@ -11,6 +11,12 @@ from data_chain.logger.logger import logger as logging class LLM: def __init__(self, openai_api_key, openai_api_base, model_name, max_tokens, request_timeout=60, temperature=0.1): + self.openai_api_key = openai_api_key + self.openai_api_base = openai_api_base + self.model_name = model_name + self.max_tokens = max_tokens + self.request_timeout = request_timeout + self.temperature = temperature self.client = ChatOpenAI(model_name=model_name, openai_api_base=openai_api_base, openai_api_key=openai_api_key, @@ -26,9 +32,14 @@ class LLM: return chat async def nostream(self, chat, system_call, user_call): - chat = self.assemble_chat(chat, system_call, user_call) - response = await self.client.ainvoke(chat) - content = re.sub(r'.*?\n\n', '', response.content, flags=re.DOTALL) + try: + chat = self.assemble_chat(chat, system_call, user_call) + response = await self.client.ainvoke(chat) + content = re.sub(r'.*?\n\n', '', response.content, flags=re.DOTALL) + except Exception as e: + err = f"[LLM] 非流式输出异常: {e}" + logging.error("[LLM] %s", err) + return '' return content async def data_producer(self, q: asyncio.Queue, history, system_call, user_call): @@ -38,10 +49,9 @@ class LLM: await q.put(frame.content) except Exception as e: await q.put(None) - import traceback - logging.error(f"Error in data producer due to: {traceback.format_exc()}") - logging.error(f"Error in data producer due to: {e}") - return + err = f"[LLM] 流式输出生产者任务异常: {e}" + logging.error("[LLM] %s", err) + raise e await q.put(None) async def stream(self, chat, system_call, user_call): @@ -51,9 +61,9 @@ class LLM: # 启动生产者任务 producer_task = asyncio.create_task(self.data_producer(q, chat, system_call, user_call)) first_token_reach = False - enc = tiktoken.encoding_for_model("gpt-4") - input_tokens=len(enc.encode(system_call)) - output_tokens=0 + enc = tiktoken.encoding_for_model("gpt-4") + input_tokens = len(enc.encode(system_call)) + output_tokens = 0 while True: data = await q.get() if data is None: @@ -61,13 +71,13 @@ class LLM: if not first_token_reach: first_token_reach = True logging.info(f"大模型回复第一个字耗时 = {time.time() - st}") - output_tokens+=len(enc.encode(data)) + output_tokens += len(enc.encode(data)) yield "data: " + json.dumps( {'content': data, - 'input_tokens':input_tokens, - 'output_tokens':output_tokens + 'input_tokens': input_tokens, + 'output_tokens': output_tokens }, ensure_ascii=False - ) + '\n\n' + ) + '\n\n' await asyncio.sleep(0.03) # 使用异步 sleep yield "data: [DONE]" diff --git a/data_chain/manager/chunk_manager.py b/data_chain/manager/chunk_manager.py index 638070e..aceb28a 100644 --- a/data_chain/manager/chunk_manager.py +++ b/data_chain/manager/chunk_manager.py @@ -2,539 +2,260 @@ from sqlalchemy import select, update, func, text, or_, and_ from typing import List, Tuple, Dict, Optional import uuid -from data_chain.logger.logger import logger as logging +from data_chain.entities.enum import DocumentStatus, ChunkStatus, Tokenizer +from data_chain.entities.request_data import ListChunkRequest from data_chain.config.config import config -from data_chain.stores.postgres.postgres import PostgresDB, ChunkEntity, ChunkLinkEntity, DocumentEntity, TemporaryChunkEntity -from data_chain.models.service import ChunkDTO -from data_chain.exceptions.exception import ChunkException +from data_chain.stores.database.database import DocumentEntity, ChunkEntity, DataBase +from data_chain.manager.knowledge_manager import KnowledgeBaseManager +from data_chain.logger.logger import logger as logging class ChunkManager(): @staticmethod - async def insert_chunk(chunk_entity: ChunkEntity) -> ChunkEntity: - async with await PostgresDB.get_session() as session: - session.add(chunk_entity) - await session.commit() - await session.refresh(chunk_entity) - return chunk_entity - - @staticmethod - async def insert_chunks(chunk_entity_list: List[ChunkEntity]) -> List[ChunkEntity]: - async with await PostgresDB.get_session() as session: - try: - session.add_all(chunk_entity_list) + async def add_chunk(chunk: ChunkEntity) -> ChunkEntity: + """添加文档""" + try: + async with await DataBase.get_session() as session: + session.add(chunk) await session.commit() - for chunk_entity in chunk_entity_list: - await session.refresh(chunk_entity) - return chunk_entity_list - except Exception as e: - logging.error(f'Error saving chunk entities due to: {e}') + return chunk + except Exception as e: + err = "添加文档解析结果失败" + logging.exception("[ChunkManager] %s", err) @staticmethod - async def select_by_chunk_id(chunk_id: uuid.UUID) -> Optional[ChunkEntity]: - async with await PostgresDB.get_session() as session: - stmt = select(ChunkEntity).where(ChunkEntity.id == chunk_id) - result = await session.execute(stmt) - chunk_entity = result.scalars().first() - return chunk_entity + async def add_chunks(chunks: List[ChunkEntity]) -> List[ChunkEntity]: + """批量添加文档""" + try: + async with await DataBase.get_session() as session: + session.add_all(chunks) + await session.commit() + return chunks + except Exception as e: + err = "批量添加文档解析结果失败" + logging.exception("[ChunkManager] %s", err) @staticmethod - async def select_by_chunk_ids(chunk_ids: List[uuid.UUID]) -> Optional[List[ChunkEntity]]: - async with await PostgresDB.get_session() as session: - stmt = select(ChunkEntity).where(ChunkEntity.id.in_(chunk_ids)) - result = await session.execute(stmt) - chunk_entity_list = result.scalars().all() - return chunk_entity_list + async def get_chunk_by_chunk_id(chunk_id: uuid.UUID) -> Optional[ChunkEntity]: + """根据文档ID查询文档解析结果""" + try: + async with await DataBase.get_session() as session: + stmt = ( + select(ChunkEntity) + .where(ChunkEntity.id == chunk_id) + ) + result = await session.execute(stmt) + return result.scalars().first() + except Exception as e: + err = "根据文档ID查询文档解析结果失败" + logging.exception("[ChunkManager] %s", err) + raise e @staticmethod - async def fetch_surrounding_context( - document_id: uuid.UUID, global_offset: int, expand_method='all', max_tokens: int = 1024, max_rd_cnt: int = 50): + async def get_chunk_cnt_by_doc_ids(doc_ids: List[uuid.UUID]) -> int: + """根据文档ID查询文档解析结果""" try: - if max_tokens <= 0: - return [] - results = [] - async with await PostgresDB.get_session() as session: - tokens = 0 - para_cnt = 0 - global_offset_set = set([global_offset]) - result = await session.execute( - select(func.min(ChunkEntity.global_offset), func.max(ChunkEntity.global_offset)). - join(DocumentEntity). - where( - ChunkEntity.document_id == document_id - ) + async with await DataBase.get_session() as session: + stmt = ( + select(func.count()) + .where(ChunkEntity.doc_id.in_(doc_ids)) + .where(ChunkEntity.status != ChunkStatus.DELETED.value) ) - min_global_offset, max_global_offset = result.one() - if expand_method == 'nex': - min_global_offset = global_offset - if expand_method == 'pre': - max_global_offset = global_offset - tokens_sub = 0 - mv_flag = None - rd_it = 0 - chunk_entity_list = ( - await session.execute( - select(ChunkEntity). - where( - and_( - ChunkEntity.document_id == document_id, - ChunkEntity.global_offset >= global_offset-max_rd_cnt, - ChunkEntity.global_offset <= global_offset+max_rd_cnt, - ChunkEntity.enabled == True - ) - ) - ) - ).scalars().all() - global_offset_set_dict = {} - for chunk_entity in chunk_entity_list: - global_offset_set_dict[chunk_entity.global_offset] = ( - chunk_entity.id, - chunk_entity.document_id, - chunk_entity.global_offset, - chunk_entity.tokens, - chunk_entity.text) - while tokens < max_tokens and (min(global_offset_set) > min_global_offset or max(global_offset_set) < - max_global_offset) and rd_it < max_rd_cnt: - result = None - new_global_offset = None - if tokens_sub <= 0 and min(global_offset_set) > min_global_offset: - mv_flag = True - new_global_offset = min(global_offset_set)-1 - elif tokens_sub > 0 and max(global_offset_set) < max_global_offset: - mv_flag = False - new_global_offset = max(global_offset_set)+1 - elif rd_it % 2 == 0 and min(global_offset_set) > min_global_offset: - mv_flag = True - new_global_offset = min(global_offset_set)-1 - elif max(global_offset_set) < max_global_offset: - mv_flag = False - new_global_offset = max(global_offset_set)+1 - else: - break - result = global_offset_set_dict.get(new_global_offset, None) - global_offset_set.add(new_global_offset) - if result: - tokens += result[3] - para_cnt += 1 - results.append(result) - if mv_flag: - tokens_sub += result[3] - else: - tokens_sub -= result[3] - rd_it += 1 - return results + result = await session.execute(stmt) + return result.scalar() except Exception as e: - logging.error(f"Fetch surrounding context failed due to: {e}") - return [] + err = "根据文档ID查询文档解析结果失败" + logging.exception("[ChunkManager] %s", err) + raise e @staticmethod - async def delete_by_document_ids(document_ids: List[str]) -> None: - async with await PostgresDB.get_session() as session: - stmt = await session.execute( - select(ChunkEntity).where(ChunkEntity.document_id.in_(document_ids)) - ) - entities = stmt.scalars().all() - for entity in entities: - await session.delete(entity) - await session.commit() + async def get_chunk_tokens_by_doc_ids(doc_ids: List[uuid.UUID]) -> int: + """根据文档ID查询文档解析结果""" + try: + async with await DataBase.get_session() as session: + stmt = ( + select(func.sum(ChunkEntity.tokens)) + .where(ChunkEntity.doc_id.in_(doc_ids)) + .where(ChunkEntity.status != ChunkStatus.DELETED.value) + ) + result = await session.execute(stmt) + return result.scalar() + except Exception as e: + err = "根据文档ID查询文档解析结果失败" + logging.exception("[ChunkManager] %s", err) + raise e - @staticmethod - async def select_by_page(params, page_number, page_size) -> Tuple[List[ChunkDTO], int]: + async def list_chunk( + req: ListChunkRequest, + ) -> Tuple[int, List[ChunkEntity]]: + """根据文档ID查询文档解析结果""" try: - async with await PostgresDB.get_session() as session: - stmt = select(ChunkEntity) - if 'document_id' in params: - stmt = stmt.where(ChunkEntity.document_id == params['document_id']) - if 'text' in params: - stmt = stmt.where(ChunkEntity.text.ilike(f"%{params['text']}%")) - if 'types' in params: - types = params['types'] - conditions = [ChunkEntity.type.ilike(f"%{type}%") for type in types] - stmt = stmt.filter(or_(*conditions)) - # 获取总数 + async with await DataBase.get_session() as session: + stmt = ( + select(ChunkEntity) + .where(ChunkEntity.status != ChunkStatus.DELETED.value) + ) + if req.doc_id: + stmt = stmt.where(ChunkEntity.doc_id == req.doc_id) + if req.text: + stmt = stmt.where(ChunkEntity.text.ilike(f"%{req.text}%")) + if req.type: + stmt = stmt.where(ChunkEntity.type == req.type) count_stmt = select(func.count()).select_from(stmt.subquery()) total = (await session.execute(count_stmt)).scalar() - - # 添加排序、偏移量和限制 - stmt = stmt.order_by(ChunkEntity.global_offset).offset((page_number - 1) * page_size).limit(page_size) - - results = await session.execute(stmt) - chunk_entity_list = results.scalars().all() - return (chunk_entity_list, total) + stmt = stmt.offset((req.page - 1) * req.page_size).limit(req.page_size) + stmt = stmt.order_by(ChunkEntity.global_offset.desc()) + result = await session.execute(stmt) + chunk_entities = result.scalars().all() + return total, chunk_entities except Exception as e: - logging.error(f"Select by page error: {e}") - raise ChunkException(f"Select by page ({params}) error.") + err = "根据文档ID查询文档解析结果失败" + logging.exception("[ChunkManager] %s", err) + raise e @staticmethod - async def update(id: str, update_dict: Dict) -> List[str]: + async def list_all_chunk_by_doc_id(doc_id: uuid.UUID) -> List[ChunkEntity]: + """根据文档ID查询文档解析结果""" try: - async with await PostgresDB.get_session() as session: - # 使用update方法进行更新操作 - await session.execute( - update(ChunkEntity). - where(ChunkEntity.id == id). - values(**update_dict) + async with await DataBase.get_session() as session: + stmt = ( + select(ChunkEntity) + .where(and_(ChunkEntity.doc_id == doc_id, + ChunkEntity.status != ChunkStatus.DELETED.value)) + .order_by(ChunkEntity.global_offset) ) - await session.commit() - return ["success"] + result = await session.execute(stmt) + return result.scalars().all() except Exception as e: - logging.error(f"Update chunk status ({update_dict}) error: {e}") - raise ChunkException(f"Update chunk status ({update_dict}) error.") + err = "根据文档ID查询文档解析结果失败" + logging.exception("[ChunkManager] %s", err) + raise e @staticmethod - async def find_top_k_similar_chunks(kb_id, content, topk=3, banned_ids=[]): + async def get_topk_chunk_by_kb_ids_vector( + kb_id: uuid.UUID, doc_ids: list[uuid.UUID], vector: List[float], + topk: int, banned_id: list[uuid.UUID] = []) -> List[ChunkEntity]: + """根据知识库ID和向量查询文档解析结果""" try: - if topk <= 0: - return [] - async with await PostgresDB.get_session() as session: - # 构建SQL查询语句 - if config['DATABASE_TYPE'] == 'postgres': - if banned_ids: - query = text(""" - SELECT - c.id, - c.document_id, - c.global_offset, - c.tokens, - c.text - FROM - chunk c - JOIN - document d ON c.document_id = d.id - WHERE - c.id NOT!=ANY(:banned_ids) AND - c.kb_id = :kb_id AND - c.enabled = true AND - d.enabled = true AND - to_tsvector(:language, c.text) @@ plainto_tsquery(:language, :content) - ORDER BY - ts_rank_cd(to_tsvector(:language, c.text), plainto_tsquery(:language, :content)) DESC - LIMIT :topk; - """) - else: - query = text(""" - SELECT - c.id, - c.document_id, - c.global_offset, - c.tokens, - c.text - FROM - chunk c - JOIN - document d ON c.document_id = d.id - WHERE - c.kb_id = :kb_id AND - c.enabled = true AND - d.enabled = true AND - to_tsvector(:language, c.text) @@ plainto_tsquery(:language, :content) - ORDER BY - ts_rank_cd(to_tsvector(:language, c.text), plainto_tsquery(:language, :content)) DESC - LIMIT :topk; - """) - elif config['DATABASE_TYPE'] == 'opengauss': - if banned_ids: - query = text(""" - SELECT - c.id, - c.document_id, - c.global_offset, - c.tokens, - c.text - FROM - chunk c - JOIN - document d ON c.document_id = d.id - WHERE - c.id NOT!=ANY(:banned_ids) AND - c.kb_id = :kb_id AND - c.enabled = true AND - d.enabled = true AND - to_tsvector('chparser', c.text) @@ plainto_tsquery('chparser', :content) - ORDER BY - ts_rank_cd(to_tsvector('chparser', c.text), plainto_tsquery('chparser', :content)) DESC - LIMIT :topk; - """) - else: - query = text(""" - SELECT - c.id, - c.document_id, - c.global_offset, - c.tokens, - c.text - FROM - chunk c - JOIN - document d ON c.document_id = d.id - WHERE - c.kb_id = :kb_id AND - c.enabled = true AND - d.enabled = true AND - to_tsvector('chparser', c.text) @@ plainto_tsquery('chparser', :content) - ORDER BY - ts_rank_cd(to_tsvector('chparser', c.text), plainto_tsquery('chparser', :content)) DESC - LIMIT :topk; - """) - if config['DATABASE_TYPE'] == 'postgres': - # 安全地绑定参数 - params = { - 'banned_ids': banned_ids, - 'language': 'zhparser', - 'kb_id': kb_id, - 'content': content, - 'topk': topk, - } - elif config['DATABASE_TYPE'] == 'opengauss': - params = { - 'banned_ids': banned_ids, - 'kb_id': kb_id, - 'content': content, - 'topk': topk, - } - result = await session.execute(query, params) - return result.all() + async with await DataBase.get_session() as session: + stmt = ( + select(ChunkEntity) + .join(DocumentEntity, + DocumentEntity.id == ChunkEntity.doc_id + ) + .join(DocumentEntity.id.in_(doc_ids)) + .where(DocumentEntity.enabled == True) + .where(DocumentEntity.status != DocumentStatus.DELETED.value) + .where(ChunkEntity.kb_id == kb_id) + .where(ChunkEntity.status != ChunkStatus.DELETED.value) + .where(ChunkEntity.id.notin_(banned_id)) + .order_by(ChunkEntity.text_vector.cosine_distance(vector).desc()) + .limit(topk) + ) + result = await session.execute(stmt) + chunk_entities = result.scalars().all() + return chunk_entities except Exception as e: - logging.error(f"Find top k similar chunks failed due to: {e}") - return [] - + err = "根据知识库ID和向量查询文档解析结果失败" + logging.exception("[ChunkManager] %s", err) + raise e -class ChunkLinkManager(): @staticmethod - async def insert_chunk_link(chunk_link_entity: ChunkLinkEntity) -> ChunkLinkEntity: - async with await PostgresDB.get_session() as session: - try: - session.add(chunk_link_entity) - await session.commit() - await session.refresh(chunk_link_entity) - return chunk_link_entity - except Exception as e: - logging.error(f"Insert chunk link failed due to: {e}") - return None - - @staticmethod - async def insert_chunk_links(chunk_link_entity_list: List[ChunkLinkEntity]) -> List[ChunkLinkEntity]: - async with await PostgresDB.get_session() as session: - try: - session.add_all(chunk_link_entity_list) - await session.commit() - for chunk_link_entity in chunk_link_entity_list: - await session.refresh(chunk_link_entity) - return chunk_link_entity_list - - except Exception as e: - logging.error(f'Insert chunk link entities failed due to: {e}') - - -class TemporaryChunkManager(): - @staticmethod - async def insert_temprorary_chunk(temprorary_chunk_entity: TemporaryChunkEntity) -> TemporaryChunkEntity: - async with await PostgresDB.get_session() as session: - try: - session.add(temprorary_chunk_entity) - await session.commit() - await session.refresh(temprorary_chunk_entity) - return temprorary_chunk_entity - except Exception as e: - logging.error(f'Insert temprorary chunk entity failed due to: {e}') - - @staticmethod - async def insert_temprorary_chunks( - temprorary_chunk_entity_list: List[TemporaryChunkEntity]) -> List[TemporaryChunkEntity]: - async with await PostgresDB.get_session() as session: - try: - session.add_all(temprorary_chunk_entity_list) - await session.commit() - for temprorary_chunk_entity in temprorary_chunk_entity_list: - await session.refresh(temprorary_chunk_entity) - return temprorary_chunk_entity_list - except Exception as e: - logging.error(f'Insert temporary chunks entities failed due to: {e}') - - @staticmethod - async def delete_by_temporary_document_ids(document_ids: List[str]) -> None: - async with await PostgresDB.get_session() as session: - try: - stmt = await session.execute( - select(TemporaryChunkEntity).where(ChunkEntity.document_id.in_(document_ids)) + async def get_topk_chunk_by_kb_id_keyword( + kb_id: uuid.UUID, doc_ids: list[uuid.UUID], query: str, + topk: int, banned_id: list[uuid.UUID] = []) -> List[ChunkEntity]: + """根据知识库ID和关键词查询文档解析结果""" + try: + async with await DataBase.get_session() as session: + kb_entity = await KnowledgeBaseManager.get_knowledge_base_by_kb_id(kb_id) + tokenizer = '' + if kb_entity.tokenizer == Tokenizer.ZH.value: + if 'opengauss' in config['DATABASE_URL']: + tokenizer = 'chparser' + else: + tokenizer = 'zhparser' + elif kb_entity.tokenizer == Tokenizer.EN.value: + tokenizer = 'english' + stmt = ( + select(ChunkEntity) + .join(DocumentEntity, + DocumentEntity.id == ChunkEntity.doc_id + ) + .where(DocumentEntity.id.in_(doc_ids)) + .where(DocumentEntity.enabled == True) + .where(DocumentEntity.status != DocumentStatus.DELETED.value) + .where(ChunkEntity.kb_id == kb_id) + .where(ChunkEntity.status != ChunkStatus.DELETED.value) + .where(ChunkEntity.id.notin_(banned_id)) + .order_by( + func.ts_rank_cd( + func.to_tsvector(tokenizer, ChunkEntity.text), + func.plainto_tsquery(tokenizer, query) + ).desc() + ) + .limit(topk) ) - entities = stmt.scalars().all() - for entity in entities: - await session.delete(entity) - await session.commit() - except Exception as e: - logging.error(f'Delete temporary chunks entities failed due to: {e}') + result = await session.execute(stmt) + chunk_entities = result.scalars().all() + return chunk_entities + except Exception as e: + err = "根据知识库ID和关键词查询文档解析结果失败" + logging.exception("[ChunkManager] %s", err) + raise e @staticmethod - async def select_by_temporary_chunk_ids(temporary_chunk_ids: List[uuid.UUID]) -> Optional[List[TemporaryChunkEntity]]: - async with await PostgresDB.get_session() as session: - try: - stmt = select(TemporaryChunkEntity).where(TemporaryChunkEntity.id.in_(temporary_chunk_ids)) + async def fetch_surrounding_chunk_by_doc_id_and_global_offset( + doc_id: uuid.UUID, global_offset: int, + topk: int = 50) -> List[ChunkEntity]: + """根据文档ID和全局偏移量查询文档解析结果""" + try: + async with await DataBase.get_session() as session: + stmt = ( + select(ChunkEntity) + .where(and_(ChunkEntity.doc_id == doc_id, + ChunkEntity.status != ChunkStatus.DELETED.value)) + .where(or_(ChunkEntity.global_offset >= global_offset - topk, + ChunkEntity.global_offset <= global_offset + topk)) + .order_by(ChunkEntity.global_offset) + ) result = await session.execute(stmt) - temporary_chunk_entity_list = result.scalars().all() - return temporary_chunk_entity_list - except Exception as e: - logging.error(f'Select temporary chunks entities failed due to: {e}') - return None + chunk_entities = result.scalars().all() + return chunk_entities + except Exception as e: + err = "根据文档ID和全局偏移量查询文档解析结果失败" + logging.exception("[ChunkManager] %s", err) + raise e @staticmethod - async def find_top_k_similar_chunks(document_ids: List[uuid.UUID], content: str, topk=3): + async def update_chunk_by_doc_id(doc_id: uuid.UUID, chunk_dict: Dict[str, str]) -> bool: + """根据文档ID更新文档解析结果""" try: - if topk <= 0: - return [] - async with await PostgresDB.get_session() as session: - # 构建SQL查询语句 - if config['DATABASE_TYPE'] == 'postgres': - if document_ids: - query = text(""" - SELECT - c.id, - c.document_id, - c.global_offset, - c.tokens, - c.text - FROM - temporary_chunk c - JOIN - temporary_document d ON c.document_id = d.id - WHERE - c.document_id=ANY(:document_ids) AND - d.status!='deleted' AND - to_tsvector(:language, c.text) @@ plainto_tsquery(:language, :content) - ORDER BY - ts_rank_cd(to_tsvector(:language, c.text), plainto_tsquery(:language, :content)) DESC - LIMIT :topk; - """) - else: - return [] - elif config['DATABASE_TYPE'] == 'opengauss': - if document_ids: - query = text(""" - SELECT - c.id, - c.document_id, - c.global_offset, - c.tokens, - c.text - FROM - temporary_chunk c - JOIN - temporary_document d ON c.document_id = d.id - WHERE - c.document_id=ANY(:document_ids) AND - d.status!='deleted' AND - to_tsvector('chparser', c.text) @@ plainto_tsquery('chparser', :content) - ORDER BY - ts_rank_cd(to_tsvector('chparser', c.text), plainto_tsquery('chparser', :content)) DESC - LIMIT :topk; - """) - else: - return [] - if config['DATABASE_TYPE'] == 'postgres': - # 安全地绑定参数 - params = { - 'document_ids': document_ids, - 'language': 'zhparser', - 'content': content, - 'topk': topk, - } - elif config['DATABASE_TYPE'] == 'opengauss': - # 安全地绑定参数 - params = { - 'document_ids': document_ids, - 'content': content, - 'topk': topk, - } - result = await session.execute(query, params) - return result.all() + async with await DataBase.get_session() as session: + stmt = ( + update(ChunkEntity) + .where(ChunkEntity.doc_id == doc_id) + .values(**chunk_dict) + ) + await session.execute(stmt) + await session.commit() + return True except Exception as e: - logging.error(f"Find top k similar temporary chunks failed due to: {e}") - return [] + err = "根据文档ID更新文档解析结果失败" + logging.exception("[ChunkManager] %s", err) @staticmethod - async def fetch_surrounding_temporary_context( - document_id: uuid.UUID, global_offset: int, expand_method='all', max_tokens: int = 1024, max_rd_cnt: int = 50): + async def update_chunk_by_chunk_id(chunk_id: uuid.UUID, chunk_dict: Dict[str, str]) -> bool: + """根据文档ID更新文档解析结果""" try: - if max_tokens <= 0: - return [] - results = [] - async with await PostgresDB.get_session() as session: - tokens = 0 - para_cnt = 0 - global_offset_set = set([global_offset]) - result = await session.execute( - select(func.min(TemporaryChunkEntity.global_offset), func.max(TemporaryChunkEntity.global_offset)). - where( - TemporaryChunkEntity.document_id == document_id - ) + async with await DataBase.get_session() as session: + stmt = ( + update(ChunkEntity) + .where(ChunkEntity.id == chunk_id) + .values(**chunk_dict) ) - min_global_offset, max_global_offset = result.one() - if expand_method == 'nex': - min_global_offset = global_offset - if expand_method == 'pre': - max_global_offset = global_offset - tokens_sub = 0 - mv_flag = None - rd_it = 0 - temporary_chunk_entity_list = ( - await session.execute( - select(TemporaryChunkEntity). - where( - and_( - TemporaryChunkEntity.document_id == document_id, - TemporaryChunkEntity.global_offset >= global_offset-max_rd_cnt, - TemporaryChunkEntity.global_offset <= global_offset+max_rd_cnt - ) - ) - ) - ).scalars().all() - global_offset_set_dict = {} - for temporary_chunk_entity in temporary_chunk_entity_list: - global_offset_set_dict[temporary_chunk_entity.global_offset] = ( - temporary_chunk_entity.id, - temporary_chunk_entity.document_id, - temporary_chunk_entity.global_offset, - temporary_chunk_entity.tokens, - temporary_chunk_entity.text) - while tokens < max_tokens and (min(global_offset_set) > min_global_offset or max(global_offset_set) < - max_global_offset) and rd_it < max_rd_cnt: - result = None - new_global_offset = None - if tokens_sub <= 0 and min(global_offset_set) > min_global_offset: - mv_flag = True - new_global_offset = min(global_offset_set)-1 - elif tokens_sub > 0 and max(global_offset_set) < max_global_offset: - mv_flag = False - new_global_offset = max(global_offset_set)+1 - elif rd_it % 2 == 0: - if min(global_offset_set) > min_global_offset: - mv_flag = True - new_global_offset = min(global_offset_set)-1 - elif max(global_offset_set) < max_global_offset: - mv_flag = False - new_global_offset = max(global_offset_set)+1 - - elif rd_it % 2 != 0: - if max(global_offset_set) < max_global_offset: - mv_flag = False - new_global_offset = max(global_offset_set)+1 - elif min(global_offset_set) > min_global_offset: - mv_flag = True - new_global_offset = min(global_offset_set)-1 - else: - break - result = global_offset_set_dict.get(new_global_offset, None) - global_offset_set.add(new_global_offset) - if result: - tokens += result[3] - para_cnt += 1 - results.append(result) - if mv_flag: - tokens_sub += result[3] - else: - tokens_sub -= result[3] - rd_it += 1 - return results + await session.execute(stmt) + await session.commit() + return True except Exception as e: - logging.error(f"Fetch surrounding temporary context failed due to: {e}") - return [] + err = "根据文档ID更新文档解析结果失败" + logging.exception("[ChunkManager] %s", err) diff --git a/data_chain/manager/dataset_manager.py b/data_chain/manager/dataset_manager.py new file mode 100644 index 0000000..121ea1d --- /dev/null +++ b/data_chain/manager/dataset_manager.py @@ -0,0 +1,216 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from sqlalchemy import select, delete, update, func, between, asc, desc, and_ +from datetime import datetime, timezone +from typing import List, Dict +import uuid +from data_chain.entities.enum import DataSetStatus +from data_chain.entities.request_data import ( + ListDatasetRequest, + ListDataInDatasetRequest +) +from data_chain.stores.database.database import DataBase, DataSetEntity, DataSetDocEntity, QAEntity, TaskEntity +from data_chain.logger.logger import logger as logging + + +class DatasetManager: + @staticmethod + async def add_dataset(dataset_entity: DataSetEntity) -> DataSetEntity: + """添加数据集""" + try: + async with await DataBase.get_session() as session: + session.add(dataset_entity) + await session.commit() + await session.refresh(dataset_entity) + except Exception as e: + err = "添加数据集失败" + logging.exception("[DatasetManager] %s", err) + raise e + return dataset_entity + + @staticmethod + async def add_datasets(dataset_entity_list: List[DataSetEntity]) -> List[DataSetEntity]: + """批量添加数据集""" + try: + async with await DataBase.get_session() as session: + session.add_all(dataset_entity_list) + await session.commit() + for dataset_entity in dataset_entity_list: + await session.refresh(dataset_entity) + except Exception as e: + err = "添加数据集失败" + logging.exception("[DatasetManager] %s", err) + return dataset_entity_list + + @staticmethod + async def add_dataset_docs(dataset_doc_entities: List[DataSetDocEntity]) -> List[DataSetDocEntity]: + """批量添加数据集文档""" + try: + async with await DataBase.get_session() as session: + session.add_all(dataset_doc_entities) + await session.commit() + for dataset_doc_entity in dataset_doc_entities: + await session.refresh(dataset_doc_entity) + except Exception as e: + err = "添加数据集文档失败" + logging.exception("[DatasetManager] %s", err) + raise e + return dataset_doc_entities + + @staticmethod + async def get_dataset_by_dataset_id(dataset_id: uuid.UUID) -> DataSetEntity: + """根据数据集ID查询数据集""" + try: + async with await DataBase.get_session() as session: + stmt = ( + select(DataSetEntity) + .where(DataSetEntity.id == dataset_id) + ) + result = await session.execute(stmt) + return result.scalars().first() + except Exception as e: + err = "根据数据集ID查询数据集失败" + logging.exception("[DatasetManager] %s", err) + raise e + + @staticmethod + async def list_dataset(req: ListDatasetRequest) -> tuple[int, List[DataSetEntity]]: + """列出数据集""" + try: + async with await DataBase.get_session() as session: + subq = (select(TaskEntity.op_id, TaskEntity.status, func.row_number().over( + partition_by=TaskEntity.op_id, order_by=desc(TaskEntity.created_time)).label('rn')).subquery()) + stmt = ( + select(DataSetEntity) + .outerjoin(subq, and_(DataSetEntity.id == subq.c.op_id, subq.c.rn == 1)) + ) + stmt = stmt.where(DataSetEntity.status != DataSetStatus.DELETED.value) + if req.kb_id: + stmt = stmt.where(DataSetEntity.kb_id == req.kb_id) + if req.dataset_id: + stmt = stmt.where(DataSetEntity.id == req.dataset_id) + if req.dataset_name: + stmt = stmt.where(DataSetEntity.name.ilike(f"%{req.dataset_name}%")) + if req.llm_id: + stmt = stmt.where(DataSetEntity.llm_id == req.llm_id) + if req.is_data_cleared: + stmt = stmt.where(DataSetEntity.is_data_cleared == req.is_data_cleared) + if req.is_chunk_related: + stmt = stmt.where(DataSetEntity.is_chunk_related == req.is_chunk_related) + if req.generate_status: + stmt = stmt.where(subq.c.status.in_([status.value for status in req.generate_status])) + if req.score_order: + if req.score_order == "asc": + stmt = stmt.order_by(asc(DataSetEntity.score)) + else: + stmt = stmt.order_by(desc(DataSetEntity.score)) + if req.author_name: + stmt = stmt.where(DataSetEntity.author_name.ilike(f"%{req.author_name}%")) + count_stmt = select(func.count()).select_from(stmt.subquery()) + total = (await session.execute(count_stmt)).scalar() + stmt = stmt.offset((req.page - 1) * req.page_size).limit(req.page_size) + result = await session.execute(stmt) + dataset_entities = result.scalars().all() + return total, dataset_entities + except Exception as e: + err = "列出数据集失败" + logging.exception("[DatasetManager] %s", err) + raise e + + @staticmethod + async def list_datasets_by_dataset_ids(dataset_ids: List[uuid.UUID]) -> List[DataSetEntity]: + """根据数据集ID列表查询数据集""" + try: + async with await DataBase.get_session() as session: + stmt = ( + select(DataSetEntity) + .where(DataSetEntity.id.in_(dataset_ids)) + ) + stmt = stmt.where(DataSetEntity.status != DataSetStatus.DELETED.value) + stmt = stmt.order_by(DataSetEntity.id.desc()) + stmt = stmt.order_by(DataSetEntity.id) + result = await session.execute(stmt) + return result.scalars().all() + except Exception as e: + err = "根据数据集ID列表查询数据集失败" + logging.exception("[DatasetManager] %s", err) + raise e + + @staticmethod + async def list_dataset_document_by_dataset_id(dataset_id: uuid.UUID) -> List[DataSetDocEntity]: + """根据数据集ID查询数据集文档""" + try: + async with await DataBase.get_session() as session: + stmt = ( + select(DataSetDocEntity) + .where(DataSetDocEntity.dataset_id == dataset_id) + ) + result = await session.execute(stmt) + return result.scalars().all() + except Exception as e: + err = "根据数据集ID查询数据集文档失败" + logging.exception("[DatasetManager] %s", err) + raise e + + @staticmethod + async def get_dataset_by_dataset_id(dataset_id: uuid.UUID) -> DataSetEntity: + """根据数据集ID查询数据集""" + try: + async with await DataBase.get_session() as session: + stmt = ( + select(DataSetEntity) + .where(DataSetEntity.id == dataset_id) + ) + result = await session.execute(stmt) + return result.scalars().first() + except Exception as e: + err = "根据数据集ID查询数据集失败" + logging.exception("[DatasetManager] %s", err) + raise e + + @staticmethod + async def update_dataset_by_dataset_id(dataset_id: uuid.UUID, dataset_dict: Dict[str, str]) -> DataSetEntity: + """根据数据集ID更新数据集""" + try: + async with await DataBase.get_session() as session: + stmt = ( + update(DataSetEntity) + .where(DataSetEntity.id == dataset_id) + .values(**dataset_dict) + ) + await session.execute(stmt) + await session.commit() + stmt = ( + select(DataSetEntity) + .where(DataSetEntity.id == dataset_id) + ) + result = await session.execute(stmt) + dataset_entity = result.scalars().first() + return dataset_entity + except Exception as e: + err = "更新数据集失败" + logging.exception("[DatasetManager] %s", err) + raise e + + @staticmethod + async def update_dataset_by_dataset_ids(dataset_ids: List[uuid.UUID], dataset_dict: Dict[str, str]) -> None: + """根据数据集ID更新数据集""" + try: + async with await DataBase.get_session() as session: + stmt = ( + update(DataSetEntity) + .where(DataSetEntity.id.in_(dataset_ids)) + .values(**dataset_dict) + ) + result = await session.execute(stmt) + await session.commit() + stmt = ( + select(DataSetEntity) + .where(DataSetEntity.id.in_(dataset_ids)) + ) + result = await session.execute(stmt) + dataset_entities = result.scalars().all() + return dataset_entities + except Exception as e: + err = "更新数据集失败" + logging.exception("[DatasetManager] %s", err) + raise e diff --git a/data_chain/manager/document_manager.py b/data_chain/manager/document_manager.py index 31bb0b2..69b7998 100644 --- a/data_chain/manager/document_manager.py +++ b/data_chain/manager/document_manager.py @@ -1,297 +1,256 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. from sqlalchemy import select, delete, update, func, between, asc, desc, and_ from datetime import datetime, timezone import uuid -from data_chain.logger.logger import logger as logging from typing import Dict, List, Tuple -from data_chain.stores.postgres.postgres import PostgresDB, DocumentEntity, TaskEntity,TemporaryDocumentEntity -from data_chain.models.constant import DocumentEmbeddingConstant,TemporaryDocumentStatusEnum - - +from data_chain.config import config +from data_chain.entities.enum import TaskStatus, OrderType +from data_chain.stores.database.database import DataBase, KnowledgeBaseEntity, DocumentTypeEntity, DocumentEntity, TaskEntity +from data_chain.entities.enum import KnowledgeBaseStatus, DocumentStatus +from data_chain.manager.knowledge_manager import KnowledgeBaseManager +from data_chain.entities.enum import Tokenizer, ChunkStatus +from data_chain.entities.request_data import ListDocumentRequest +from data_chain.logger.logger import logger as logging class DocumentManager(): + """文档管理类""" @staticmethod - async def insert(entity: DocumentEntity) -> DocumentEntity: + async def add_document(document_entity: DocumentEntity) -> DocumentEntity: + """添加文档""" try: - async with await PostgresDB.get_session() as session: - session.add(entity) + async with await DataBase.get_session() as session: + session.add(document_entity) await session.commit() - await session.refresh(entity) - return entity + return document_entity except Exception as e: - logging.error(f"Failed to insert entity: {e}") - return None + err = "添加文档失败" + logging.exception("[DocumentManager] %s", err) @staticmethod - async def insert_bulk(entity_list: List[DocumentEntity]) -> List[DocumentEntity]: - try: - async with await PostgresDB.get_session() as session: - session.add_all(entity_list) - await session.commit() - # 可以选择刷新所有实体,但这可能不是必要的,取决于实际需求 - for entity in entity_list: - await session.refresh(entity) - return entity_list - except Exception as e: - logging.error(f"Failed to insert bulk entities: {e}") - return [] + async def add_documents(document_entities: List[DocumentEntity]) -> List[DocumentEntity]: + """批量添加文档""" + pass @staticmethod - async def select_by_id(id: uuid.UUID) -> DocumentEntity: + async def get_topk_document_by_kb_id_vector( + kb_id: uuid.UUID, vector: list[float], topk: int) -> List[DocumentEntity]: + """根据知识库ID和向量获取前K个文档""" try: - async with await PostgresDB.get_session() as session: - stmt = select(DocumentEntity).where(DocumentEntity.id == id) + async with await DataBase.get_session() as session: + stmt = ( + select(DocumentEntity) + .where(DocumentEntity.kb_id == kb_id) + .where(DocumentEntity.status != DocumentStatus.DELETED.value) + .where(DocumentEntity.enabled == True) + .where(DocumentEntity.abstract_vector.cosine_distance(vector).desc()) + .order_by(DocumentEntity.abstract_vector.cosine_distance(vector).desc()) + .limit(topk) + ) result = await session.execute(stmt) - return result.scalars().first() - except Exception as e: - logging.error(f"Failed to select entity by ID: {e}") - return None - - @staticmethod - async def select_by_ids(ids: List[uuid.UUID]) -> List[DocumentEntity]: - try: - async with await PostgresDB.get_session() as session: - stmt = select(DocumentEntity).where(DocumentEntity.id.in_(ids)) - results = await session.execute(stmt) - return results.scalars().all() + document_entities = result.scalars().all() + return document_entities except Exception as e: - logging.error(f"Failed to select entity by ID: {e}") - return None + err = "获取前K个文档失败" + logging.exception("[DocumentManager] %s", err) + raise e @staticmethod - async def select_by_knowledge_base_id(kb_id: uuid.UUID) -> List[DocumentEntity]: + async def get_topk_document_by_kb_id_keyword( + kb_id: uuid.UUID, query: str, topk: int) -> List[DocumentEntity]: + """根据知识库ID和关键词获取前K个文档""" try: - async with await PostgresDB.get_session() as session: # 确保 get_async_session 返回一个异步的session - stmt = select(DocumentEntity).where(DocumentEntity.kb_id == kb_id) + async with await DataBase.get_session() as session: + kb_entity = await KnowledgeBaseManager.get_knowledge_base_by_kb_id(kb_id) + tokenizer = '' + if kb_entity.tokenizer == Tokenizer.ZH.value: + if 'opengauss' in config['DATABASE_URL']: + tokenizer = 'chparser' + else: + tokenizer = 'zhparser' + elif kb_entity.tokenizer == Tokenizer.EN.value: + tokenizer = 'english' + stmt = ( + select(DocumentEntity) + .where(DocumentEntity.kb_id == kb_id) + .where(DocumentEntity.status != DocumentStatus.DELETED.value) + .where(DocumentEntity.enabled == True) + .where(func.ts_rank_cd( + func.to_tsvector(tokenizer, DocumentEntity.abstract), + func.plainto_tsquery(tokenizer, query) + ).desc()) + .limit(topk) + ) result = await session.execute(stmt) - return result.scalars().all() + document_entities = result.scalars().all() + return document_entities except Exception as e: - logging.error(f"Failed to select by knowledge base id: {e}") - return [] + err = "获取前K个文档失败" + logging.exception("[DocumentManager] %s", err) + raise e @staticmethod - async def select_by_knowledge_base_id_and_file_name(kb_id: str, file_name: str): + async def list_document(req: ListDocumentRequest) -> tuple[int, List[DocumentEntity]]: + """ + 列出文档 + :param req: 请求参数 + :return: 文档列表 + """ try: - async with await PostgresDB.get_session() as session: - stmt = select(DocumentEntity).where( - and_(DocumentEntity.kb_id == kb_id, - DocumentEntity.name == file_name)) - result = await session.execute(stmt) - return result.scalars().first() - except Exception as e: - logging.error(f"Failed to select by knowledge base id and file name: {e}") - return None + async with await DataBase.get_session() as session: + subq = (select(TaskEntity.op_id, TaskEntity.status, func.row_number().over( + partition_by=TaskEntity.op_id, order_by=desc(TaskEntity.created_time)).label('rn')).subquery()) - @staticmethod - async def select_by_page(params: Dict, page_number: int, page_size: int) -> Tuple[int, List['DocumentEntity']]: - try: - async with await PostgresDB.get_session() as session: - # 子查询:找到每个文档最近的任务 - subq = ( - select( - TaskEntity.op_id, - TaskEntity.status, - func.row_number().over(partition_by=TaskEntity.op_id, order_by=desc(TaskEntity.created_time)).label('rn') - ).subquery() - ) - - # 主查询 stmt = ( select(DocumentEntity) .outerjoin(subq, and_(DocumentEntity.id == subq.c.op_id, subq.c.rn == 1)) ) - if 'kb_id' in params: - stmt = stmt.where(DocumentEntity.kb_id == params['kb_id']) - if 'id' in params: - stmt = stmt.where(DocumentEntity.id == params['id']) - if 'name' in params: - stmt = stmt.where(DocumentEntity.name.ilike(f"%{params['name']}%")) - if 'parser_method' in params: - stmt = stmt.where(DocumentEntity.parser_method.in_(params['parser_method'])) - if 'document_type_list' in params: - document_type_ids = params['document_type_list'] - stmt = stmt.where(DocumentEntity.type_id.in_(document_type_ids)) - if 'created_time_order' in params: - if params['created_time_order'] == 'desc': - stmt = stmt.order_by(desc(DocumentEntity.created_time)) - elif params['created_time_order'] == 'asc': - stmt = stmt.order_by(asc(DocumentEntity.created_time)) - stmt = stmt.order_by(desc(DocumentEntity.id)) - if 'created_time_start' in params and 'created_time_end' in params: - stmt = stmt.where(between(DocumentEntity.created_time, - datetime.strptime(params['created_time_start'], '%Y-%m-%d %H:%M').replace(tzinfo=timezone.utc), - datetime.strptime(params['created_time_end'], '%Y-%m-%d %H:%M').replace(tzinfo=timezone.utc))) - if 'status' in params: - stmt = stmt.where(subq.c.status.in_(params['status'])) - if 'enabled' in params: - stmt = stmt.where(DocumentEntity.enabled == params['enabled']) - # Execute the count part of the query separately + stmt = stmt.where(DocumentEntity.status != DocumentStatus.DELETED.value) + if req.kb_id: + stmt = stmt.where(DocumentEntity.kb_id == req.kb_id) + if req.doc_id: + stmt = stmt.where(DocumentEntity.id == req.doc_id) + if req.doc_name: + stmt = stmt.where(DocumentEntity.name.ilike(f"%{req.doc_name}%")) + if req.doc_type_id: + stmt = stmt.where(DocumentEntity.type_id == req.doc_type_id) + if req.parse_status: + stmt = stmt.where(subq.c.status.in_([status.value for status in req.parse_status])) + if req.parse_method: + stmt = stmt.where(DocumentEntity.parse_method == req.parse_method) + if req.author_name: + stmt = stmt.where(DocumentEntity.author_name.ilike(f"%{req.author_name}%")) + if req.created_time_start and req.created_time_end: + stmt = stmt.where( + between(DocumentEntity.created_time, + datetime.strptime(req.created_time_start, + '%Y-%m-%d %H:%M').replace(tzinfo=timezone.utc), + datetime.strptime(req.created_time_end, '%Y-%m-%d %H:%M').replace(tzinfo=timezone.utc)) + ) count_stmt = select(func.count()).select_from(stmt.subquery()) total = (await session.execute(count_stmt)).scalar() - # Apply pagination - stmt = stmt.offset((page_number-1)*page_size).limit(page_size) - - # Execute the main query + stmt = stmt.offset((req.page - 1) * req.page_size).limit(req.page_size) + if req.created_time_order == OrderType.DESC: + stmt = stmt.order_by(DocumentEntity.created_time.desc()) + else: + stmt = stmt.order_by(DocumentEntity.created_time.asc()) + stmt = stmt.order_by(DocumentEntity.id.desc()) result = await session.execute(stmt) - document_list = result.scalars().all() - - return (total, document_list) + document_entities = result.scalars().all() + return (total, document_entities) except Exception as e: - logging.error(f"Failed to select documents by page: {e}") - return (0, []) + err = "获取文档列表失败" + logging.exception("[DocumentManager] %s", err) + raise e + @staticmethod - async def select_cnt_and_sz_by_kb_id(kb_id: uuid.UUID) -> Tuple[int, int]: + async def list_all_document_by_kb_id(kb_id: uuid.UUID) -> List[DocumentEntity]: + """根据知识库ID获取文档列表""" try: - async with await PostgresDB.get_session() as session: - # 构造查询语句 - stmt = ( - select( - func.count(DocumentEntity.id).label('total_cnt'), - func.sum(DocumentEntity.size).label('total_sz') - ) - .where(DocumentEntity.kb_id == kb_id) - ) - - # 执行查询 + async with await DataBase.get_session() as session: + stmt = select(DocumentEntity).where( + and_(DocumentEntity.kb_id == kb_id, + DocumentEntity.status != DocumentStatus.DELETED.value)) result = await session.execute(stmt) - - # 获取结果 - first_row = result.first() - - # 如果没有结果,返回 (0, 0) - if first_row is None: - return 0, 0 - - total_cnt, total_sz = first_row - return int(total_cnt) if total_cnt is not None else 0, int(total_sz) if total_sz is not None else 0 + document_entities = result.scalars().all() + return document_entities except Exception as e: - logging.error(f"Failed to select count and size by knowledge base id: {e}") - return 0, 0 + err = "获取所有文档列表失败" + logging.exception("[DocumentManager] %s", err) + raise e @staticmethod - async def update(id: str, update_dict: dict): + async def get_document_by_doc_id(doc_id: uuid.UUID) -> DocumentEntity: + """根据文档ID获取文档""" try: - async with await PostgresDB.get_session() as session: - # 使用异步查询 - result = await session.execute( - select(DocumentEntity).where(DocumentEntity.id == id).with_for_update() - ) + async with await DataBase.get_session() as session: + stmt = select(DocumentEntity).where( + and_(DocumentEntity.id == doc_id, + DocumentEntity.status != DocumentStatus.DELETED.value)) + result = await session.execute(stmt) document_entity = result.scalars().first() - if 'status' in update_dict.keys() and update_dict['status'] != DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_STATUS_PENDING: - if document_entity.status != DocumentEmbeddingConstant.DOCUMENT_EMBEDDING_STATUS_PENDING: - return None - # 更新记录 - await session.execute( - update(DocumentEntity).where(DocumentEntity.id == id).values(**update_dict) - ) - - await session.commit() return document_entity except Exception as e: - logging.error(f"Failed to update document: {e}") - return False - - @staticmethod - async def delete_by_id(id: uuid.UUID) -> int: - async with await PostgresDB.get_session() as session: - stmt = delete(DocumentEntity).where(DocumentEntity.id == id) - result = await session.execute(stmt) - await session.commit() - return result.rowcount - - @staticmethod - async def delete_by_ids(ids: List[uuid.UUID]) -> int: - async with await PostgresDB.get_session() as session: - stmt = delete(DocumentEntity).where(DocumentEntity.id.in_(ids)) - result = await session.execute(stmt) - await session.commit() - return result.rowcount - - @staticmethod - async def delete_by_knowledge_base_id(kb_id: uuid.UUID) -> int: - async with await PostgresDB.get_session() as session: - stmt = delete(DocumentEntity).where(DocumentEntity.kb_id == kb_id) - result = await session.execute(stmt) - await session.commit() - return result.rowcount + err = "获取文档失败" + logging.exception("[DocumentManager] %s", err) + raise e -class TemporaryDocumentManager(): @staticmethod - async def insert(entity: TemporaryDocumentEntity) -> TemporaryDocumentEntity: + async def update_doc_type_by_kb_id( + kb_id: uuid.UUID, old_doc_type_ids: list[uuid.UUID], + new_doc_type_id: uuid.UUID) -> None: + """根据知识库ID更新文档类型""" try: - async with await PostgresDB.get_session() as session: - session.add(entity) + async with await DataBase.get_session() as session: + stmt = update(DocumentEntity).where( + and_(DocumentEntity.kb_id == kb_id, + DocumentEntity.status != DocumentStatus.DELETED.value, + DocumentEntity.type_id.in_(old_doc_type_ids)) + ).values(type_id=new_doc_type_id) + await session.execute(stmt) await session.commit() - await session.refresh(entity) - return entity except Exception as e: - logging.error(f"Failed to insert temporary document: {e}") - return None + err = "更新文档类型失败" + logging.exception("[DocumentManager] %s", err) + raise e + @staticmethod - async def delete_by_ids(ids: List[uuid.UUID]) -> int: + async def update_document_by_doc_id(doc_id: uuid.UUID, doc_dict: Dict[str, str]) -> DocumentEntity: + """根据文档ID更新文档""" try: - async with await PostgresDB.get_session() as session: - stmt = delete(TemporaryDocumentEntity).where(TemporaryDocumentEntity.id.in_(ids)) - result = await session.execute(stmt) + async with await DataBase.get_session() as session: + stmt = update(DocumentEntity).where( + and_(DocumentEntity.id == doc_id, + DocumentEntity.status != DocumentStatus.DELETED.value) + ).values(**doc_dict) + await session.execute(stmt) await session.commit() - return result.rowcount + return await DocumentManager.get_document_by_doc_id(doc_id) except Exception as e: - logging.error(f"Failed to delete temporary documents: {e}") - return 0 - @staticmethod - async def select_by_ids(ids: List[uuid.UUID]) -> List[TemporaryDocumentEntity]: - try: - async with await PostgresDB.get_session() as session: - stmt = select(TemporaryDocumentEntity).where( - and_( - TemporaryDocumentEntity.id.in_(ids), - TemporaryDocumentEntity.status !=TemporaryDocumentStatusEnum.DELETED - ) - ) - result = await session.execute(stmt) - tmp_list = result.scalars().all() - return tmp_list - except Exception as e: - logging.error(f"Failed to select temporary documents by page: {e}") - return [] + err = "更新文档失败" + logging.exception("[DocumentManager] %s", err) + raise e + @staticmethod - async def select_by_id(id: uuid.UUID) -> TemporaryDocumentEntity: + async def update_document_by_doc_ids(doc_ids: list[uuid.UUID], doc_dict: Dict[str, str]) -> list[DocumentEntity]: + """根据文档ID批量更新文档""" try: - async with await PostgresDB.get_session() as session: - stmt = select(TemporaryDocumentEntity).where(TemporaryDocumentEntity.id==id) + async with await DataBase.get_session() as session: + stmt = update(DocumentEntity).where( + and_(DocumentEntity.id.in_(doc_ids), + DocumentEntity.status != DocumentStatus.DELETED.value) + ).values(**doc_dict) result = await session.execute(stmt) - return result.scalars().first() - except Exception as e: - logging.error(f"Failed to select temporary document by id: {e}") - return None - @staticmethod - async def update(id: uuid.UUID, update_dict: dict): - try: - async with await PostgresDB.get_session() as session: - await session.execute( - update(TemporaryDocumentEntity).where(TemporaryDocumentEntity.id == id).values(**update_dict) - ) await session.commit() - entity=await session.execute(select(TemporaryDocumentEntity).where( - TemporaryDocumentEntity.id==id, - ) + stmt = select(DocumentEntity).where( + DocumentEntity.id.in_(doc_ids) ) - return entity + document_entities = await session.execute(stmt) + document_entities = result.scalars().all() + return document_entities except Exception as e: - logging.error(f"Failed to update temporary document by id: {e}") - return None + err = "批量更新文档失败" + logging.exception("[DocumentManager] %s", err) + raise e + @staticmethod - async def update_all(ids: List[uuid.UUID], update_dict: dict): - try: - async with await PostgresDB.get_session() as session: - await session.execute( - update(TemporaryDocumentEntity).where(TemporaryDocumentEntity.id.in_(ids)).values(**update_dict) - ) - await session.commit() - return True - except Exception as e: - logging.error(f"Failed to update temporary document by ids: {e}") - return False \ No newline at end of file + async def delte_document_by_doc_id(doc_id: uuid.UUID) -> None: + """根据文档ID删除文档""" + pass + + @staticmethod + async def delete_document_by_kb_id(kb_id: uuid.UUID) -> None: + """根据知识库ID删除文档""" + pass + + @staticmethod + async def delete_document_by_doc_id(doc_id: uuid.UUID) -> None: + """根据文档ID删除文档""" + pass + + @staticmethod + async def delete_document_by_doc_ids(doc_ids: list[uuid.UUID]) -> None: + """根据文档ID批量删除文档""" + pass diff --git a/data_chain/manager/document_type_manager.py b/data_chain/manager/document_type_manager.py index 03e988a..ca422ea 100644 --- a/data_chain/manager/document_type_manager.py +++ b/data_chain/manager/document_type_manager.py @@ -1,114 +1,70 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from sqlalchemy import select, delete, update -from typing import List +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from sqlalchemy import select, delete, update, func, between, asc, desc, and_ +from datetime import datetime, timezone import uuid from data_chain.logger.logger import logger as logging +from typing import Dict, List, Tuple -from data_chain.models.service import DocumentTypeDTO -from data_chain.stores.postgres.postgres import PostgresDB, DocumentEntity, DocumentTypeEntity - - +from data_chain.stores.database.database import DataBase, KnowledgeBaseEntity, DocumentTypeEntity, DocumentEntity +from data_chain.entities.enum import KnowledgeBaseStatus, DocumentStatus class DocumentTypeManager(): @staticmethod - async def select_by_id(id: str) -> DocumentTypeEntity: - async with await PostgresDB.get_session()as session: - stmt = select(DocumentTypeEntity).where(DocumentTypeEntity.id == id) - result = await session.execute(stmt) - return result.scalars().first() - @staticmethod - async def select_by_ids(ids: List[str]) -> List[DocumentTypeEntity]: - async with await PostgresDB.get_session()as session: - stmt = select(DocumentTypeEntity).where(DocumentTypeEntity.id.in_(ids)) - result = await session.execute(stmt) - return result.scalars().all() - @staticmethod - async def select_by_knowledge_base_id(kb_id: str) -> List[DocumentTypeEntity]: - async with await PostgresDB.get_session()as session: - stmt = select(DocumentTypeEntity).where(DocumentTypeEntity.kb_id == kb_id) - result = await session.execute(stmt) - return result.scalars().all() + async def add_document_type(document_type_entity: DocumentTypeEntity) -> DocumentTypeEntity: + """添加文档类型""" + try: + async with await DataBase.get_session() as session: + session.add(document_type_entity) + await session.commit() + return document_type_entity + except Exception as e: + err = "添加文档类型失败" + logging.exception("[DocumentTypeManager] %s", err) @staticmethod - async def insert_bulk(kb_id: uuid.UUID, types: List[str]) -> List[DocumentTypeEntity]: - if types is None or len(types) == 0: - return [] - async with await PostgresDB.get_session()as session: - document_type_entity_list = [ - DocumentTypeEntity(kb_id=kb_id, type=type) for type in types - ] - session.add_all(document_type_entity_list) - await session.commit() - # Refresh the entities after committing so that they have their primary keys filled in. - for entity in document_type_entity_list: - await session.refresh(entity) - return document_type_entity_list + async def add_document_types( + document_type_entities: List[DocumentTypeEntity]) -> List[DocumentTypeEntity]: + """批量添加文档类型""" + try: + async with await DataBase.get_session() as session: + session.add_all(document_type_entities) + await session.commit() + return document_type_entities + except Exception as e: + err = "批量添加文档类型失败" + logging.exception("[DocumentTypeManager] %s", err) @staticmethod - async def update_knowledge_base_document_type(kb_id: str, types: List['DocumentTypeDTO'] = None): + async def get_document_type_by_id( + doc_type_id: uuid.UUID) -> DocumentTypeEntity: + """根据文档类型ID查询文档类型""" try: - async with await PostgresDB.get_session()as session: - if types is not None: - new_document_type_map = {str(_type.id): _type.type for _type in types} - new_document_type_ids = {_type.id for _type in types} - old_document_type_ids = set((await session.execute( - select(DocumentTypeEntity.id).filter(DocumentTypeEntity.kb_id == kb_id))).scalars().all()) - delete_document_type_ids = old_document_type_ids - new_document_type_ids - add_document_type_ids = new_document_type_ids - old_document_type_ids - update_document_type_ids = old_document_type_ids & new_document_type_ids - - # 删掉document_type, 然后document对应的type_id修改为默认值 - if len(delete_document_type_ids) > 0: - default_document_type_id = uuid.UUID('00000000-0000-0000-0000-000000000000') - await session.execute( - delete(DocumentTypeEntity).where(DocumentTypeEntity.id.in_(delete_document_type_ids))) - await session.execute( - update(DocumentEntity).where(DocumentEntity.type_id.in_(delete_document_type_ids)).values(type_id=default_document_type_id)) - await session.commit() - - # 插入document_type - if len(add_document_type_ids) > 0: - add_document_type_entity_list = [ - DocumentTypeEntity( - id=add_document_type_id, kb_id=kb_id, type=new_document_type_map[str(add_document_type_id)]) - for add_document_type_id in add_document_type_ids - ] - session.add_all(add_document_type_entity_list) - await session.commit() - - # 修改document_type - if len(update_document_type_ids) > 0: - old_document_type_entity_list=( - await session.execute( - select(DocumentTypeEntity).filter(DocumentTypeEntity.id.in_(update_document_type_ids))) - ).scalars().all() - for old_document_type_entity in old_document_type_entity_list: - new_type = new_document_type_map.get(str(old_document_type_entity.id),None) - if old_document_type_entity.type != new_type: - await session.execute( - update(DocumentTypeEntity).where(DocumentTypeEntity.id == old_document_type_entity.id).values(type=new_type)) - await session.commit() - - results = await session.execute(select(DocumentTypeEntity).filter(DocumentTypeEntity.kb_id == kb_id)) - return results.scalars().all() + async with await DataBase.get_session() as session: + stmt = ( + select(DocumentTypeEntity) + .where(DocumentTypeEntity.id == doc_type_id) + ) + result = await session.execute(stmt) + return result.scalars().one_or_none() except Exception as e: - logging.error(f"Update document type faile by knowledge base id failed due to: {e}") - return [] + err = "查询文档类型失败" + logging.exception("[DocumentTypeManager] %s", err) + raise e @staticmethod - async def delete_by_knowledge_base_id(kb_id: str) -> int: + async def update_doc_type_by_doc_type_id( + doc_type_id: uuid.UUID, doc_type_name: str) -> None: + """根据文档类型ID更新文档类型名称""" try: - async with await PostgresDB.get_session() as session: - # 构建删除语句 - stmt = delete(DocumentTypeEntity).where(DocumentTypeEntity.kb_id == kb_id) - # 执行删除操作 - result = await session.execute(stmt) - # 提交事务 + async with await DataBase.get_session() as session: + stmt = update(DocumentTypeEntity).where( + DocumentTypeEntity.id == doc_type_id + ).values(name=doc_type_name) + await session.execute(stmt) await session.commit() - # 返回删除的数量 - return result.rowcount except Exception as e: - logging.error(f"Failed to delete by knowledge base id: {e}") - return 0 + err = "更新文档类型名称失败" + logging.exception("[DocumentTypeManager] %s", err) + raise e diff --git a/data_chain/manager/image_manager.py b/data_chain/manager/image_manager.py index ed091aa..6138b4f 100644 --- a/data_chain/manager/image_manager.py +++ b/data_chain/manager/image_manager.py @@ -1,116 +1,41 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from sqlalchemy import select -from typing import List +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from sqlalchemy import select, update +from typing import List, Dict import uuid from data_chain.logger.logger import logger as logging -from data_chain.stores.postgres.postgres import PostgresDB, ImageEntity - - +from data_chain.stores.database.database import DataBase, ImageEntity class ImageManager: - - @staticmethod - async def add_image(image_entity: ImageEntity): - try: - async with await PostgresDB.get_session() as session: - session.add(image_entity) - await session.commit() - await session.refresh(image_entity) - except Exception as e: - logging.error(f"Add image failed due to error: {e}") - return None - return image_entity + """图片管理类""" @staticmethod - async def add_images(image_entity_list: List[ImageEntity]): + async def add_images(image_entity_list: List[ImageEntity]) -> List[ImageEntity]: try: - async with await PostgresDB.get_session() as session: + async with await DataBase.get_session() as session: session.add_all(image_entity_list) await session.commit() for image_entity in image_entity_list: await session.refresh(image_entity) except Exception as e: - logging.error(f"Add image failed due to error: {e}") - return None - return image_entity_list - @staticmethod - async def del_image_by_image_id(image_id: uuid.UUID): - try: - async with await PostgresDB.get_session() as session: - # 使用执行SQL语句的方式获取并删除用户 - stmt = select(ImageEntity).where(ImageEntity.id == image_id) - result = await session.execute(stmt) - image_to_delete = result.scalars().first() - - if image_to_delete is not None: - await session.delete(image_to_delete) - await session.commit() - except Exception as e: - logging.error(f"Delete image failed due to error: {e}") - return False - return True - - @staticmethod - async def query_image_by_image_id(image_id: uuid.UUID): - try: - async with await PostgresDB.get_session() as session: - # 使用执行SQL语句的方式获取并删除用户 - stmt = select(ImageEntity).where(ImageEntity.id == image_id) - result = await session.execute(stmt) - image_entity = result.scalars().first() - except Exception as e: - logging.error(f"Query image by image id failed due to error: {e}") - return None - return image_entity - - @staticmethod - async def query_image_by_doc_id(doc_id: uuid.UUID): - try: - async with await PostgresDB.get_session() as session: - # 使用执行SQL语句的方式获取并删除用户 - stmt = select(ImageEntity).where(ImageEntity.document_id == doc_id) - result = await session.execute(stmt) - image_entity_list = result.scalars().all() - except Exception as e: - logging.error(f"Query image by doc id failed due to error: {e}") - return [] - return image_entity_list - - @staticmethod - async def query_image_by_user_id(user_id: uuid.UUID): - try: - async with await PostgresDB.get_session() as session: - # 使用执行SQL语句的方式获取并删除用户 - stmt = select(ImageEntity).where(ImageEntity.user_id == user_id) - result = await session.execute(stmt) - image_entity_list = result.scalars().all() - except Exception as e: - logging.error(f"Query image by user id failed due to error: {e}") - return [] + err = "添加图片失败" + logging.exception("[ImageManager] %s", err) + raise e return image_entity_list @staticmethod - async def update_image_by_image_id(image_id, tmp_dict: dict): + async def update_images_by_doc_id(doc_id: uuid.UUID, image_dict: Dict[str, str]) -> None: + """根据文档ID更新图片""" try: - async with await PostgresDB.get_session() as session: - # 使用执行SQL语句的方式获取用户 - stmt = select(ImageEntity).where(ImageEntity.id == image_id) - result = await session.execute(stmt) - image_to_update = result.scalars().first() - - if image_to_update is None: - raise ValueError(f"No image found with id {image_id}") - - # 更新用户属性 - for key, value in tmp_dict.items(): - if hasattr(image_to_update, key): - setattr(image_to_update, key, value) - else: - logging.error(f"Attribute {key} does not exist on User model") - + async with await DataBase.get_session() as session: + stmt = ( + update(ImageEntity) + .where(ImageEntity.doc_id == doc_id) + .values(**image_dict) + ) + await session.execute(stmt) await session.commit() - return True except Exception as e: - logging.error(f"Failed to update user: {e}") - return False + err = "更新图片失败" + logging.exception("[ImageManager] %s", err) + raise e diff --git a/data_chain/manager/knowledge_manager.py b/data_chain/manager/knowledge_manager.py index 54d172b..e1e9e7a 100644 --- a/data_chain/manager/knowledge_manager.py +++ b/data_chain/manager/knowledge_manager.py @@ -1,151 +1,136 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. from typing import Dict, List, Tuple, Optional import uuid from data_chain.logger.logger import logger as logging -from sqlalchemy import and_, select, delete, func,between -from datetime import datetime,timezone -from data_chain.stores.postgres.postgres import PostgresDB, KnowledgeBaseEntity -from data_chain.models.constant import KnowledgeStatusEnum -from data_chain.models.constant import embedding_model_out_dimensions - +from sqlalchemy import and_, select, delete, update, func, between +from datetime import datetime, timezone +from data_chain.entities.request_data import ListKnowledgeBaseRequest +from data_chain.stores.database.database import DataBase, KnowledgeBaseEntity, DocumentTypeEntity, DocumentEntity +from data_chain.entities.enum import KnowledgeBaseStatus, DocumentStatus class KnowledgeBaseManager(): - + """知识库管理类""" @staticmethod - async def insert(entity: KnowledgeBaseEntity) -> Optional[KnowledgeBaseEntity]: + async def add_knowledge_base(knowledge_base_entity: KnowledgeBaseEntity) -> KnowledgeBaseEntity: + """添加知识库""" try: - async with await PostgresDB.get_session() as session: - vector_items_table = await PostgresDB.get_dynamic_vector_items_table( - str(entity.vector_items_id), - embedding_model_out_dimensions[entity.embedding_model] - ) - await PostgresDB.create_table(vector_items_table) - session.add(entity) + async with await DataBase.get_session() as session: + session.add(knowledge_base_entity) await session.commit() - await session.refresh(entity) # Refresh the entity to get any auto-generated values. - return entity + return knowledge_base_entity except Exception as e: - import traceback - logging.error(f"Failed to insert entity: {traceback.format_exc()}") - logging.error(f"Failed to insert entity: {e}") - return None + err = "添加知识库失败" + logging.exception("[KnowledgeBaseManager] %s", err) @staticmethod - async def update(id: uuid.UUID, update_dict: Dict) -> Optional[KnowledgeBaseEntity]: + async def get_knowledge_base_by_kb_id(kb_id: uuid.UUID) -> KnowledgeBaseEntity: + """根据知识库ID获取知识库""" try: - async with await PostgresDB.get_session() as session: - stmt = select(KnowledgeBaseEntity).where(KnowledgeBaseEntity.id == id).with_for_update() + async with await DataBase.get_session() as session: + stmt = select(KnowledgeBaseEntity).where(and_(KnowledgeBaseEntity.id == kb_id, + KnowledgeBaseEntity.status != KnowledgeBaseStatus.DEELTED)) result = await session.execute(stmt) - entity = result.scalars().first() - - if 'status' in update_dict.keys() and update_dict['status'] != KnowledgeStatusEnum.IDLE: - if entity.status != KnowledgeStatusEnum.IDLE: - return None - if entity is not None: - for key, value in update_dict.items(): - setattr(entity, key, value) - await session.commit() - await session.refresh(entity) # Refresh the entity to ensure it's up to date. - return entity + knowledge_base_entity = result.scalars().first() + return knowledge_base_entity except Exception as e: - logging.error(f"Failed to update entity: {e}") - return None + err = "获取知识库失败" + logging.exception("[KnowledgeBaseManager] %s", err) + raise e @staticmethod - async def select_by_id(id: uuid.UUID) -> KnowledgeBaseEntity: - async with await PostgresDB.get_session() as session: # 假设get_async_session返回一个异步会话 - stmt = select(KnowledgeBaseEntity).where(KnowledgeBaseEntity.id == id) - result = await session.execute(stmt) - entity = result.scalars().first() - return entity - return None + async def list_knowledge_base(req: ListKnowledgeBaseRequest) -> Tuple[int, List[KnowledgeBaseEntity]]: + """列出知识库""" + try: + async with await DataBase.get_session() as session: + stmt = select(KnowledgeBaseEntity).where( + KnowledgeBaseEntity.status != KnowledgeBaseStatus.DELETED.value) + if req.team_id: + stmt = stmt.where(KnowledgeBaseEntity.team_id == req.team_id) + if req.kb_id: + stmt = stmt.where(KnowledgeBaseEntity.id == req.kb_id) + if req.kb_name: + stmt = stmt.where(KnowledgeBaseEntity.name.like(f"%{req.kb_name}%")) + if req.author_name: + stmt = stmt.where(KnowledgeBaseEntity.author_name.like(f"%{req.author_name}%")) + count_stmt = select(func.count()).select_from(stmt.subquery()) + total = (await session.execute(count_stmt)).scalar() + stmt = stmt.limit(req.page_size).offset((req.page - 1) * req.page_size) + stmt = stmt.order_by(KnowledgeBaseEntity.created_time.desc()) + stmt = stmt.order_by(KnowledgeBaseEntity.id.desc()) + result = await session.execute(stmt) + knowledge_base_entities = result.scalars().all() + return (total, knowledge_base_entities) + except Exception as e: + err = "列出知识库失败" + logging.exception("[KnowledgeBaseManager] %s", err) + raise e @staticmethod - async def select_by_user_id_and_kb_name(user_id: uuid.UUID, kb_name: str) -> KnowledgeBaseEntity: + async def list_knowledge_base_by_team_ids( + team_ids: List[uuid.UUID], + kb_name: str = None) -> List[KnowledgeBaseEntity]: + """根据团队ID获取知识库""" try: - async with await PostgresDB.get_session() as session: + async with await DataBase.get_session() as session: stmt = select(KnowledgeBaseEntity).where( - and_(KnowledgeBaseEntity.user_id == user_id, KnowledgeBaseEntity.name == kb_name)) + and_(KnowledgeBaseEntity.team_id.in_(team_ids), + KnowledgeBaseEntity.status != KnowledgeBaseStatus.DELETED.value)) + if kb_name: + stmt = stmt.where(KnowledgeBaseEntity.name.like(f"%{kb_name}%")) result = await session.execute(stmt) - entity = result.scalars().first() - return entity + knowledge_base_entities = result.scalars().all() + return knowledge_base_entities except Exception as e: - logging.error(f"Failed to select by user id and kb name: {e}") - return None + err = "获取知识库失败" + logging.exception("[KnowledgeBaseManager] %s", err) + raise e @staticmethod - async def select_by_page(params: Dict, page_number: int, page_size: int) -> Tuple[int, List[KnowledgeBaseEntity]]: + async def list_doc_types_by_kb_id(kb_id: uuid.UUID) -> List[DocumentTypeEntity]: + """列出知识库文档类型""" try: - async with await PostgresDB.get_session() as session: - base_query = select(KnowledgeBaseEntity).where( - KnowledgeBaseEntity.status != KnowledgeStatusEnum.IMPORTING) - # 构建过滤条件 - filters = [] - if 'id' in params.keys(): - filters.append(KnowledgeBaseEntity.id == params['id']) - if 'user_id' in params.keys(): - filters.append(KnowledgeBaseEntity.user_id == params['user_id']) - if 'name' in params.keys(): - filters.append(KnowledgeBaseEntity.name.ilike(f"%{params['name']}%")) - if 'created_time_start' in params and 'created_time_end' in params: - filters.append(between(KnowledgeBaseEntity.created_time, - datetime.strptime(params['created_time_start'], '%Y-%m-%d %H:%M').replace(tzinfo=timezone.utc), - datetime.strptime(params['created_time_end'], '%Y-%m-%d %H:%M').replace(tzinfo=timezone.utc))) - # 应用过滤条件 - query = base_query.where(*filters) - - # 排序 - if 'created_time_order' in params.keys(): - if params['created_time_order'] == 'desc': - query = query.order_by(KnowledgeBaseEntity.created_time.desc()) - elif params['created_time_order'] == 'asc': - query = query.order_by(KnowledgeBaseEntity.created_time.asc()) - if 'document_count_order' in params.keys(): - if params['document_count_order'] == 'desc': - query = query.order_by(KnowledgeBaseEntity.document_number.desc()) - elif params['document_count_order'] == 'asc': - query = query.order_by(KnowledgeBaseEntity.document_number.asc()) - - # 获取总数 - count_query = select(func.count()).select_from(query.subquery()) - total = await session.scalar(count_query) - # 分页查询 - paginated_query = query.offset((page_number - 1) * page_size).limit(page_size) - results = await session.scalars(paginated_query) - knowledge_base_entity_list = results.all() - - return (total, knowledge_base_entity_list) + async with await DataBase.get_session() as session: + stmt = select(DocumentTypeEntity).where(DocumentTypeEntity.kb_id == kb_id) + result = await session.execute(stmt) + document_type_entities = result.scalars().all() + return document_type_entities except Exception as e: - logging.error(f"Failed to select by page: {e}") - return (0, []) + err = "列出知识库文档类型失败" + logging.exception("[KnowledgeBaseManager] %s", err) + raise e @staticmethod - async def delete(kb_id: str) -> int: + async def update_knowledge_base_by_kb_id(kb_id: uuid.UUID, kb_dict: Dict[str, str]) -> KnowledgeBaseEntity: + """根据知识库ID更新知识库""" try: - async with await PostgresDB.get_session() as session: + async with await DataBase.get_session() as session: + stmt = update(KnowledgeBaseEntity).where(KnowledgeBaseEntity.id == kb_id).values(**kb_dict) + await session.execute(stmt) + await session.commit() stmt = select(KnowledgeBaseEntity).where(KnowledgeBaseEntity.id == kb_id) result = await session.execute(stmt) knowledge_base_entity = result.scalars().first() + return knowledge_base_entity + except Exception as e: + err = "更新知识库失败" + logging.exception("[KnowledgeBaseManager] %s", err) + raise e - if knowledge_base_entity: - try: - vector_items_id = str(knowledge_base_entity.vector_items_id) - vector_dim = embedding_model_out_dimensions[knowledge_base_entity.embedding_model] - vector_items_table = await PostgresDB.get_dynamic_vector_items_table( - vector_items_id, - vector_dim - ) - await PostgresDB.drop_table(vector_items_table) - except Exception as e: - logging.error(f"Failed to delete vector items table: {e}") - delete_stmt = delete(KnowledgeBaseEntity).where(KnowledgeBaseEntity.id == kb_id) - result = await session.execute(delete_stmt) - cnt = result.rowcount - await session.commit() - return cnt - else: - return 0 + @staticmethod + async def update_doc_cnt_and_doc_size(kb_id: uuid.UUID) -> None: + """根据知识库ID更新知识库文档数量和文档大小,获取document表内状态不是deleted的文档数量和大小""" + try: + async with await DataBase.get_session() as session: + stmt = select(func.count(DocumentEntity.id), func.sum(DocumentEntity.size)).where( + and_(DocumentEntity.kb_id == kb_id, DocumentEntity.status != DocumentStatus.DELETED.value)) + result = await session.execute(stmt) + doc_cnt, doc_size = result.first() + stmt = update(KnowledgeBaseEntity).where(KnowledgeBaseEntity.id == kb_id).values( + document_count=doc_cnt, document_size=doc_size) + await session.execute(stmt) + await session.commit() except Exception as e: - logging.error(f"Failed to delete knowledge base entity: {e}") - return 0 + err = "更新知识库文档数量和大小失败" + logging.exception("[KnowledgeBaseManager] %s", err) + raise e diff --git a/data_chain/manager/qa_manager.py b/data_chain/manager/qa_manager.py new file mode 100644 index 0000000..f743eb1 --- /dev/null +++ b/data_chain/manager/qa_manager.py @@ -0,0 +1,158 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from sqlalchemy import select, update, func +from typing import List, Dict +import uuid +from data_chain.entities.enum import QAStatus +from data_chain.entities.request_data import ( + ListDataInDatasetRequest +) +from data_chain.stores.database.database import DataBase, QAEntity +from data_chain.logger.logger import logger as logging + + +class QAManager: + """问答管理类""" + @staticmethod + async def add_qa(qa_entity: QAEntity) -> QAEntity: + """添加问答""" + try: + async with await DataBase.get_session() as session: + session.add(qa_entity) + await session.commit() + await session.refresh(qa_entity) + except Exception as e: + err = "添加问答失败" + logging.exception("[QAManager] %s", err) + raise e + return qa_entity + + @staticmethod + async def add_qas(qa_entity_entities: List[QAEntity]) -> List[QAEntity]: + """批量添加问答""" + try: + async with await DataBase.get_session() as session: + session.add_all(qa_entity_entities) + await session.commit() + for qa_entity in qa_entity_entities: + await session.refresh(qa_entity) + except Exception as e: + err = "批量添加问答失败" + logging.exception("[QAManager] %s", err) + raise e + return qa_entity_entities + + @staticmethod + async def get_data_by_data_id(data_id: uuid.UUID) -> QAEntity: + """根据数据ID查询问答""" + try: + async with await DataBase.get_session() as session: + stmt = ( + select(QAEntity) + .where(QAEntity.id == data_id) + ) + result = await session.execute(stmt) + return result.scalars().first() + except Exception as e: + err = "查询问答失败" + logging.exception("[QAManager] %s", err) + raise e + + @staticmethod + async def list_all_qa_by_dataset_id(dataset_id: uuid.UUID) -> List[QAEntity]: + """根据数据集ID查询问答""" + try: + async with await DataBase.get_session() as session: + stmt = ( + select(QAEntity) + .where(QAEntity.dataset_id == dataset_id) + ) + result = await session.execute(stmt) + return result.scalars().all() + except Exception as e: + err = "查询问答失败" + logging.exception("[QAManager] %s", err) + raise e + + @staticmethod + @staticmethod + async def list_data_in_dataset(req: ListDataInDatasetRequest) -> tuple[int, List[QAEntity]]: + """根据数据集ID列出数据""" + try: + async with await DataBase.get_session() as session: + stmt = ( + select(QAEntity) + .where(QAEntity.status != QAStatus.DELETED.value) + ) + if req.dataset_id: + stmt = stmt.where(QAEntity.dataset_id == req.dataset_id) + count_stmt = select(func.count()).select_from(stmt.subquery()) + total = (await session.execute(count_stmt)).scalar() + stmt = stmt.offset((req.page - 1) * req.page_size).limit(req.page_size) + result = await session.execute(stmt) + qa_entities = result.scalars().all() + return total, qa_entities + except Exception as e: + err = "根据数据集ID列出数据失败" + logging.exception("[DatasetManager] %s", err) + raise e + + @staticmethod + async def update_qa_by_qa_id(qa_id: uuid.UUID, qa_dict: Dict[str, str]) -> QAEntity: + """根据问答ID更新问答""" + try: + async with await DataBase.get_session() as session: + stmt = ( + update(QAEntity) + .where(QAEntity.id == qa_id) + .values(**qa_dict) + ).returning(QAEntity) + result = await session.execute(stmt) + await session.commit() + qa_entity = result.scalars().first() + return qa_entity + except Exception as e: + err = "更新问答失败" + logging.exception("[QAManager] %s", err) + raise e + + @staticmethod + async def update_qa_by_dataset_id(dataset_id: uuid.UUID, qa_dict: Dict[str, str]) -> None: + """根据数据集ID更新问答""" + try: + async with await DataBase.get_session() as session: + stmt = ( + update(QAEntity) + .where(QAEntity.dataset_id == dataset_id) + .values(**qa_dict) + ) + await session.execute(stmt) + await session.commit() + except Exception as e: + err = "更新问答失败" + logging.exception("[QAManager] %s", err) + raise e + + @staticmethod + async def update_qa_by_qa_ids( + qa_ids: List[uuid.UUID], qa_dict: Dict[str, str]) -> None: + """根据问答ID列表更新问答""" + try: + async with await DataBase.get_session() as session: + stmt = ( + update(QAEntity) + .where(QAEntity.id.in_(qa_ids)) + .values(**qa_dict) + ) + result = await session.execute(stmt) + await session.commit() + stmt = ( + select(QAEntity) + .where(QAEntity.id.in_(qa_ids)) + ) + result = await session.execute(stmt) + qa_entities = result.scalars().all() + return qa_entities + except Exception as e: + err = "更新问答失败" + logging.exception("[QAManager] %s", err) + raise e diff --git a/data_chain/manager/role_manager.py b/data_chain/manager/role_manager.py new file mode 100644 index 0000000..4b97da7 --- /dev/null +++ b/data_chain/manager/role_manager.py @@ -0,0 +1,91 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from sqlalchemy import select, delete, and_ +from typing import Dict +import uuid + +from data_chain.logger.logger import logger as logging +from data_chain.entities.request_data import ListTeamRequest +from data_chain.entities.enum import TeamStatus +from data_chain.stores.database.database import DataBase, RoleEntity, ActionEntity, RoleActionEntity, UserRoleEntity + + +class RoleManager: + @staticmethod + async def add_role(role_entity: RoleEntity) -> RoleEntity: + """添加角色""" + try: + async with await DataBase.get_session() as session: + session.add(role_entity) + await session.commit() + await session.refresh(role_entity) + except Exception as e: + err = "添加角色失败" + logging.exception("[RoleManager] %s", err) + raise e + return role_entity + + @staticmethod + async def add_user_role(user_role_entity: UserRoleEntity) -> UserRoleEntity: + """添加用户角色""" + try: + async with await DataBase.get_session() as session: + session.add(user_role_entity) + await session.commit() + await session.refresh(user_role_entity) + except Exception as e: + err = "添加用户角色失败" + logging.exception("[RoleManager] %s", err) + raise e + return user_role_entity + + @staticmethod + async def add_action(action_entity: ActionEntity) -> ActionEntity: + """添加操作""" + try: + async with await DataBase.get_session() as session: + session.add(action_entity) + await session.commit() + await session.refresh(action_entity) + return True + except Exception as e: + err = "添加操作失败" + logging.warning("[RoleManager] %s", err) + return False + + @staticmethod + async def add_role_actions(role_action_entities: list[RoleActionEntity]) -> bool: + """添加角色操作""" + try: + async with await DataBase.get_session() as session: + for role_action_entity in role_action_entities: + session.add(role_action_entity) + await session.commit() + for role_action_entity in role_action_entities: + await session.refresh(role_action_entity) + except Exception as e: + err = "添加角色操作失败" + logging.exception("[RoleManager] %s", err) + raise e + + @staticmethod + async def get_action_by_team_id_user_sub_and_action( + user_sub: str, team_id: uuid.UUID, action: str) -> ActionEntity: + """根据团队ID、用户ID和操作获取操作""" + try: + async with await DataBase.get_session() as session: + stmt = select(ActionEntity).join( + RoleActionEntity, ActionEntity.action == RoleActionEntity.action).join( + UserRoleEntity, RoleActionEntity.role_id == UserRoleEntity.role_id).where( + and_( + UserRoleEntity.user_id == user_sub, + UserRoleEntity.team_id == team_id, + ActionEntity.action == action, + ) + ) + result = await session.execute(stmt) + action_entity = result.scalars().first() + return action_entity + except Exception as e: + err = "根据团队ID、用户ID和操作获取操作失败" + logging.exception("[RoleManager] %s", err) + raise e diff --git a/data_chain/manager/session_manager.py b/data_chain/manager/session_manager.py new file mode 100644 index 0000000..e8c8153 --- /dev/null +++ b/data_chain/manager/session_manager.py @@ -0,0 +1,37 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from data_chain.logger.logger import logger as logging +from data_chain.stores.mongodb.mongodb import Session, MongoDB + + +class SessionManager: + """浏览器Session管理""" + + @staticmethod + async def verify_user(session_id: str) -> bool: + """验证用户是否在Session中""" + try: + collection = MongoDB().get_collection("session") + data = await collection.find_one({"_id": session_id}) + if not data: + return False + return Session(**data).user_sub is not None + except Exception as e: + err = "用户不在Session中" + logging.error("[SessionManager] %s", err) + raise e + + @staticmethod + async def get_user_sub(session_id: str) -> str | None: + """从Session中获取用户""" + try: + collection = MongoDB().get_collection("session") + data = await collection.find_one({"_id": session_id}) + if not data: + return None + user_sub = Session(**data).user_sub + except Exception as e: + err = "从Session中获取用户失败" + logging.error("[SessionManager] %s", err) + raise e + + return user_sub diff --git a/data_chain/manager/task_manager.py b/data_chain/manager/task_manager.py index 80060a5..a408419 100644 --- a/data_chain/manager/task_manager.py +++ b/data_chain/manager/task_manager.py @@ -3,291 +3,165 @@ from sqlalchemy import select, delete, update, desc, asc, func, exists, or_, and from sqlalchemy.orm import aliased import uuid from typing import Dict, List, Optional, Tuple +from data_chain.entities.request_data import ListTaskRequest from data_chain.logger.logger import logger as logging -from data_chain.stores.postgres.postgres import PostgresDB, TaskEntity, TaskStatusReportEntity, DocumentEntity, KnowledgeBaseEntity -from data_chain.models.constant import TaskConstant - +from data_chain.stores.database.database import DataBase, TaskEntity +from data_chain.entities.enum import TaskType, TaskStatus class TaskManager(): - @staticmethod - async def insert(entity: TaskEntity) -> TaskEntity: + async def add_task(task_entity: TaskEntity) -> TaskEntity: + """添加任务""" try: - async with await PostgresDB.get_session() as session: - session.add(entity) + async with await DataBase.get_session() as session: + session.add(task_entity) await session.commit() - await session.refresh(entity) # 刷新实体以获取可能由数据库生成的数据(例如自增ID) - return entity + await session.refresh(task_entity) except Exception as e: - logging.error(f"Failed to insert entity: {e}") - return None + err = "添加任务失败" + logging.exception("[TaskManager] %s", err) + raise e + return task_entity @staticmethod - async def update(task_id: uuid.UUID, update_dict: Dict): + async def get_task_by_task_id(task_id: uuid.UUID) -> TaskEntity: + """根据任务ID获取任务""" try: - async with await PostgresDB.get_session() as session: - stmt = select(TaskEntity).where(TaskEntity.id == task_id) + async with await DataBase.get_session() as session: + stmt = select(TaskEntity).where(and_(TaskEntity.id == task_id, + TaskEntity.status != TaskStatus.DELETED.value)) result = await session.execute(stmt) - entity = result.scalars().first() - if entity is not None: - for key, value in update_dict.items(): - setattr(entity, key, value) - await session.commit() - await session.refresh(entity) # Refresh the entity to ensure it's up to date. - return entity + task_entity = result.scalars().first() + return task_entity except Exception as e: - logging.error(f"Failed to update entity: {e}") - return None + err = "获取任务失败" + logging.exception("[TaskManager] %s", err) + raise e @staticmethod - async def update_task_by_op_id(op_id: uuid.UUID, update_dict: Dict): + async def get_current_task_by_op_id(op_id: uuid.UUID) -> Optional[TaskEntity]: + """根据op_id获取当前最近的任务""" try: - async with await PostgresDB.get_session() as session: - stmt = update(TaskEntity).where(TaskEntity.op_id == op_id).values(**update_dict) - await session.execute(stmt) - await session.commit() - return True + async with await DataBase.get_session() as session: + stmt = select(TaskEntity).where(TaskEntity.op_id == op_id).order_by(desc(TaskEntity.created_time)) + result = await session.execute(stmt) + task_entity = result.scalars().first() + return task_entity except Exception as e: - logging.error(f"Failed to update entity: {e}") - return False + err = "获取任务失败" + logging.exception("[TaskManager] %s", err) + raise e @staticmethod - async def select_by_page(page_number: int, page_size: int, params: Dict) -> Tuple[int, List[TaskEntity]]: + async def get_task_by_task_status(task_status: str) -> List[TaskEntity]: + """根据任务状态获取任务""" try: - async with await PostgresDB.get_session() as session: - stmt = select(TaskEntity).where(TaskEntity.status != TaskConstant.TASK_STATUS_DELETED) - stmt = stmt.where( - exists(select(1).where(or_( - DocumentEntity.id == TaskEntity.op_id, - KnowledgeBaseEntity.id == TaskEntity.op_id - ))) - ) - if 'user_id' in params: - stmt = stmt.where(TaskEntity.user_id == params['user_id']) - if 'id' in params: - stmt = stmt.where(TaskEntity.id == params['id']) - if 'op_id' in params: - stmt = stmt.where(TaskEntity.op_id == params['op_id']) - if 'types' in params: - stmt = stmt.where(TaskEntity.type.in_(params['types'])) - if 'status' in params: - stmt = stmt.where(TaskEntity.status == params['status']) - if 'created_time_order' in params: - if params['created_time_order'] == 'desc': - stmt = stmt.order_by(desc(TaskEntity.created_time)) - elif params['created_time_order'] == 'asc': - stmt = stmt.order_by(asc(TaskEntity.created_time)) - - # Execute the count part of the query separately - count_stmt = select(func.count()).select_from(stmt.subquery()) - total = (await session.execute(count_stmt)).scalar() - - # Apply pagination - stmt = stmt.offset((page_number-1)*page_size).limit(page_size) - - # Execute the main query + async with await DataBase.get_session() as session: + stmt = select(TaskEntity).where(TaskEntity.status == task_status) result = await session.execute(stmt) - task_list = result.scalars().all() - - return (total, task_list) + task_entity = result.scalars().all() + return task_entity except Exception as e: - logging.error(f"Failed to select tasks by page: {e}") - return (0, []) + err = "获取任务失败" + logging.exception("[TaskManager] %s", err) + raise e @staticmethod - async def select_by_id(id: uuid.UUID) -> TaskEntity: - async with await PostgresDB.get_session() as session: - stmt = select(TaskEntity).where(TaskEntity.id == id) - result = await session.execute(stmt) - return result.scalar() - - @staticmethod - async def select_by_ids(ids: List[uuid.UUID]) -> List[TaskEntity]: - async with await PostgresDB.get_session() as session: - stmt = select(TaskEntity).where(TaskEntity.id.in_(ids)) - result = await session.execute(stmt) - result = result.scalars().all() - return result - - @staticmethod - async def select_by_user_id(user_id: uuid.UUID) -> TaskEntity: - async with await PostgresDB.get_session() as session: - stmt = select(TaskEntity).where(and_(TaskEntity.user_id == user_id, - TaskEntity.status != TaskConstant.TASK_STATUS_DELETED)) - result = await session.execute(stmt) - return result.scalars().all() - - @staticmethod - async def select_by_op_id(op_id: uuid.UUID, method='one') -> TaskEntity: - async with await PostgresDB.get_session() as session: - stmt = select(TaskEntity).where( - TaskEntity.op_id == op_id).order_by( - desc(TaskEntity.created_time)) - if method=='one': - stmt=stmt.limit(1) - result = await session.execute(stmt) - if method == 'one': - result = result.scalars().first() - else: - result = result.scalars().all() - return result - @staticmethod - async def select_latest_task_by_op_ids(op_ids: List[uuid.UUID]) -> List[TaskEntity]: - async with await PostgresDB.get_session() as session: - # 创建一个别名用于子查询 - task_alias = aliased(TaskEntity) - - # 构建子查询,为每个op_id分配一个行号 - subquery = ( - select( - task_alias, - func.row_number().over( - partition_by=task_alias.op_id, - order_by=desc(task_alias.created_time) - ).label('row_num') + async def list_current_tasks_by_op_ids(op_ids: list[uuid.UUID]) -> List[TaskEntity]: + """根据op_id列表查询当前任务""" + try: + async with await DataBase.get_session() as session: + # 创建一个别名用于子查询 + task_alias = aliased(TaskEntity) + + # 构建子查询,为每个op_id分配一个行号 + subquery = ( + select( + task_alias, + func.row_number().over( + partition_by=task_alias.op_id, + order_by=desc(task_alias.created_time) + ).label('rn') + ) + .where(task_alias.op_id.in_(op_ids)) + .subquery() ) - .where(task_alias.op_id.in_(op_ids)) - .subquery() - ) - - # 主查询选择row_num为1的记录,即每个op_id的最新任务 - stmt = ( - select(TaskEntity) - .join(subquery, TaskEntity.id == subquery.c.id) - .where(subquery.c.row_num == 1) - ) - - result = await session.execute(stmt) - return result.scalars().all() - - @staticmethod - async def delete_by_op_id(op_id: uuid.UUID) -> TaskEntity: - async with await PostgresDB.get_session() as session: - stmt = delete(TaskEntity).where( - TaskEntity.op_id == op_id) - await session.execute(stmt) - - @staticmethod - async def select_by_op_ids(op_ids: List[uuid.UUID]) -> List[TaskEntity]: - async with await PostgresDB.get_session() as session: - stmt = select(TaskEntity).where(TaskEntity.op_id.in_(op_ids)) - result = await session.execute(stmt) - return result.scalars().all() - - @staticmethod - async def select_by_user_id_and_task_type( - user_id: uuid.UUID, task_type: Optional[str] = None) -> List[TaskEntity]: - async with await PostgresDB.get_session() as session: - stmt = select(TaskEntity).where(TaskEntity.user_id == user_id) - if task_type: - stmt = stmt.where(TaskEntity.type == task_type) - stmt = stmt.order_by(TaskEntity.created_time.desc()) - result = await session.execute(stmt) - return result.scalars().all() - - @staticmethod - async def select_by_user_id_and_task_type_list( - user_id: uuid.UUID, task_type_list: Optional[List[str]] = []) -> List[TaskEntity]: - async with await PostgresDB.get_session() as session: - stmt = select(TaskEntity).where(TaskEntity.user_id == user_id) - stmt = stmt.where(TaskEntity.type.in_(task_type_list)) - stmt = stmt.where(TaskEntity.status != TaskConstant.TASK_STATUS_DELETED) - stmt = stmt.order_by(TaskEntity.created_time.desc()) - result = await session.execute(stmt) - return result.scalars().all() - - @staticmethod - async def delete_by_id(id: uuid.UUID): - async with await PostgresDB.get_session() as session: - # 构建删除语句 - stmt = ( - select(TaskEntity) - .where(TaskEntity.id == id) - ) - # 执行删除操作 - result = await session.execute(stmt) - instances = result.scalars().all() - if instances: - for instance in instances: - await session.delete(instance) - await session.commit() - - @staticmethod - async def delete_by_ids(ids: List[uuid.UUID]): - async with await PostgresDB.get_session() as session: - # 构建删除语句 - stmt = ( - select(TaskEntity) - .where(TaskEntity.id.in_(ids)) - ) - # 执行删除操作 - result = await session.execute(stmt) - instances = result.scalars().all() - if instances: - for instance in instances: - await session.delete(instance) - await session.commit() - - -class TaskStatusReportManager(): + stmt = ( + select(TaskEntity) + .join(subquery, and_( + TaskEntity.op_id == subquery.c.op_id, + subquery.c.rn == 1, + TaskEntity.status != TaskStatus.DELETED.value + )) + ) + result = await session.execute(stmt) + task_entities = result.scalars().all() + return task_entities + except Exception as e: + err = "查询当前任务失败" + logging.exception("[TaskManager] %s", err) + raise e @staticmethod - async def insert(entity: TaskStatusReportEntity): - async with await PostgresDB.get_session() as session: - session.add(entity) - await session.commit() - return entity + async def list_task(user_sub: str, req: ListTaskRequest) -> Tuple[int, List[TaskEntity]]: + """列出任务""" + try: + async with await DataBase.get_session() as session: + stmt = select(TaskEntity).where(and_(TaskEntity.user_id == user_sub, + TaskEntity.status != TaskStatus.DELETED.value)) + if req.team_id: + stmt = stmt.where(TaskEntity.team_id == req.team_id) + if req.task_id: + stmt = stmt.where(TaskEntity.id == req.task_id) + if req.task_type: + stmt = stmt.where(TaskEntity.task_type == req.task_type.value) + if req.task_status: + stmt = stmt.where(TaskEntity.status == req.task_status.value) - @staticmethod - async def del_by_task_id(task_id: uuid.UUID): - async with await PostgresDB.get_session() as session: - stmt = ( - select(TaskStatusReportEntity) - .where(TaskStatusReportEntity.task_id == task_id) - ) - result = await session.execute(stmt) - entities = result.scalars().all() - for entity in entities: - await session.delete(entity) - await session.commit() + count_stmt = select(func.count()).select_from(stmt.subquery()) + total = (await session.execute(count_stmt)).scalar() + stmt = stmt.offset((req.page - 1) * req.page_size).limit(req.page_size) + stmt = stmt.order_by(TaskEntity.created_time.desc()) + result = await session.execute(stmt) + task_entities = result.scalars().all() + return total, task_entities + except Exception as e: + err = "列出任务失败" + logging.exception("[TaskManager] %s", err) + raise e @staticmethod - async def select_by_task_id(task_id: uuid.UUID, limited: int = 10): - async with await PostgresDB.get_session() as session: - stmt = ( - select(TaskStatusReportEntity) - .where(TaskStatusReportEntity.task_id == task_id) - .order_by(desc(TaskStatusReportEntity.created_time)) - .limit(limited) - ) - result = await session.execute(stmt) - return result.scalars().all() + async def list_task_by_user_sub_and_team_id_and_type( + user_sub: str, team_id: uuid.UUID, task_type: TaskType) -> List[TaskEntity]: + """根据用户ID、团队ID和任务类型查询任务""" + try: + async with await DataBase.get_session() as session: + stmt = select(TaskEntity).where(and_(TaskEntity.user_id == user_sub, + TaskEntity.team_id == team_id, + TaskEntity.task_type == task_type.value, + TaskEntity.status != TaskStatus.DELETED.value)) + result = await session.execute(stmt) + task_entities = result.scalars().all() + return task_entities + except Exception as e: + err = "查询任务失败" + logging.exception("[TaskManager] %s", err) + raise e @staticmethod - async def select_latest_report_by_task_ids(task_ids: List[uuid.UUID]) -> List[TaskStatusReportEntity]: - async with await PostgresDB.get_session() as session: - # 创建一个别名用于子查询 - report_alias = aliased(TaskStatusReportEntity) - - # 构建子查询,为每个task_id分配一个行号 - subquery = ( - select( - report_alias, - func.row_number().over( - partition_by=report_alias.task_id, - order_by=desc(report_alias.created_time) - ).label('row_num') - ) - .where(report_alias.task_id.in_(task_ids)) - .subquery() - ) - # 主查询选择row_num为1的记录,即每个op_id的最新任务 - stmt = ( - select(TaskStatusReportEntity) - .join(subquery, TaskStatusReportEntity.id == subquery.c.id) - .where(subquery.c.row_num == 1) - ) - - result = await session.execute(stmt) - return result.scalars().all() + async def update_task_by_id(task_id: uuid.UUID, task_dict: Dict) -> TaskEntity: + """根据任务ID更新任务""" + try: + async with await DataBase.get_session() as session: + stmt = update(TaskEntity).where(TaskEntity.id == task_id).values(**task_dict) + await session.execute(stmt) + await session.commit() + stmt = select(TaskEntity).where(TaskEntity.id == task_id) + result = await session.execute(stmt) + task_entity = result.scalars().first() + return task_entity + except Exception as e: + err = "更新任务失败" + logging.exception("[TaskManager] %s", err) + raise e diff --git a/data_chain/manager/task_queue_mamanger.py b/data_chain/manager/task_queue_mamanger.py new file mode 100644 index 0000000..4523e19 --- /dev/null +++ b/data_chain/manager/task_queue_mamanger.py @@ -0,0 +1,61 @@ + +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. +from sqlalchemy import select, delete, update, desc, asc, func, exists, or_, and_ +from sqlalchemy.orm import aliased +import uuid +from typing import Dict, List, Optional, Tuple +from data_chain.logger.logger import logger as logging +from data_chain.stores.database.database import DataBase, TaskEntity +from data_chain.stores.mongodb.mongodb import MongoDB, Task +from data_chain.entities.enum import TaskStatus + + +class TaskQueueManager(): + """任务队列管理类""" + + @staticmethod + async def add_task(task: Task): + try: + async with MongoDB.get_session() as session, await session.start_transaction(): + task_colletion = MongoDB.get_collection('witchiand_task') + await task_colletion.insert_one(task.model_dump(by_alias=True), session=session) + except Exception as e: + err = "添加任务到队列失败" + logging.exception("[TaskQueueManager] %s", err) + + @staticmethod + async def delete_task_by_id(task_id: uuid.UUID): + """根据任务ID删除任务""" + try: + async with MongoDB.get_session() as session, await session.start_transaction(): + task_colletion = MongoDB.get_collection('witchiand_task') + await task_colletion.delete_one({"_id": task_id}, session=session) + except Exception as e: + err = "删除任务失败" + logging.exception("[TaskQueueManager] %s", err) + raise e + + @staticmethod + async def get_oldest_tasks_by_status(status: TaskStatus) -> Task: + """根据任务状态获取最早的任务""" + try: + async with MongoDB.get_session() as session: + task_colletion = MongoDB.get_collection('witchiand_task') + task = await task_colletion.find_one({"status": status.value}, sort=[("created_time", 1)], session=session) + return Task(**task) if task else None + except Exception as e: + err = "获取最早的任务失败" + logging.exception("[TaskQueueManager] %s", err) + raise e + + @staticmethod + async def update_task_by_id(task_id: uuid.UUID, task: Task): + """根据任务ID更新任务""" + try: + async with MongoDB.get_session() as session, await session.start_transaction(): + task_colletion = MongoDB.get_collection('witchiand_task') + await task_colletion.update_one({"_id": task_id}, {"$set": task.model_dump(by_alias=True)}, session=session) + except Exception as e: + err = "更新任务失败" + logging.exception("[TaskQueueManager] %s", err) + raise e diff --git a/data_chain/manager/task_report_manager.py b/data_chain/manager/task_report_manager.py new file mode 100644 index 0000000..b9d7dfe --- /dev/null +++ b/data_chain/manager/task_report_manager.py @@ -0,0 +1,74 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. +from sqlalchemy import select, delete, update, desc, asc, func, exists, or_, and_ +from sqlalchemy.orm import aliased +import uuid +from typing import Dict, List, Optional, Tuple +from data_chain.logger.logger import logger as logging +from data_chain.stores.database.database import DataBase, TaskReportEntity +from data_chain.entities.enum import TaskStatus + + +class TaskReportManager(): + @staticmethod + async def add_task_report(task_report_entity: TaskReportEntity) -> TaskReportEntity: + """添加任务报告""" + try: + async with await DataBase.get_session() as session: + session.add(task_report_entity) + await session.commit() + await session.refresh(task_report_entity) + return task_report_entity + except Exception as e: + err = "添加任务报告失败" + logging.exception("[TaskReportManager] %s", err) + + @staticmethod + async def list_current_task_report_by_task_ids(task_ids: List[uuid.UUID]) -> List[TaskReportEntity]: + """根据任务ID列表查询当前任务报告""" + try: + async with await DataBase.get_session() as session: + # 创建一个别名用于子查询 + report_alias = aliased(TaskReportEntity) + + # 构建子查询,为每个task_id分配一个行号 + subquery = ( + select( + report_alias, + func.row_number().over( + partition_by=report_alias.task_id, + order_by=desc(report_alias.created_time) + ).label('rn') + ) + .where(report_alias.task_id.in_(task_ids)) + .subquery() + ) + stmt = ( + select(TaskReportEntity) + .join(subquery, and_( + TaskReportEntity.task_id == subquery.c.task_id, + subquery.c.rn == 1 + )) + ) + result = await session.execute(stmt) + task_report_entities = result.scalars().all() + return task_report_entities + except Exception as e: + err = "查询当前任务报告失败" + logging.exception("[TaskReportManager] %s", err) + raise e + + @staticmethod + async def list_all_task_report_by_task_id(task_id: uuid.UUID) -> List[TaskReportEntity]: + """根据任务ID查询所有任务报告""" + try: + async with await DataBase.get_session() as session: + stmt = select(TaskReportEntity).where(TaskReportEntity.task_id == task_id) + stmt = stmt.order_by(TaskReportEntity.created_time.desc()) + stmt = stmt.order_by(TaskReportEntity.id.asc()) + result = await session.execute(stmt) + task_report_entities = result.scalars().all() + return task_report_entities + except Exception as e: + err = "查询所有任务报告失败" + logging.exception("[TaskReportManager] %s", err) + raise e diff --git a/data_chain/manager/team_manager.py b/data_chain/manager/team_manager.py new file mode 100644 index 0000000..9efe572 --- /dev/null +++ b/data_chain/manager/team_manager.py @@ -0,0 +1,152 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from sqlalchemy import select, update, delete, and_, func +from typing import Dict +import uuid + +from data_chain.logger.logger import logger as logging +from data_chain.entities.request_data import ListTeamRequest +from data_chain.entities.enum import TeamStatus +from data_chain.stores.database.database import DataBase, TeamEntity, TeamUserEntity + + +class TeamManager: + + @staticmethod + async def add_team(team_entity: TeamEntity) -> TeamEntity: + """添加团队""" + try: + async with await DataBase.get_session() as session: + session.add(team_entity) + await session.commit() + await session.refresh(team_entity) + except Exception as e: + err = "添加团队失败" + logging.exception("[TeamManger] %s", err) + raise e + return team_entity + + @staticmethod + async def add_team_user(team_user_entity: TeamUserEntity) -> TeamUserEntity: + """添加团队成员""" + try: + async with await DataBase.get_session() as session: + session.add(team_user_entity) + await session.commit() + await session.refresh(team_user_entity) + except Exception as e: + err = "添加团队成员失败" + logging.exception("[TeamManger] %s", err) + return team_user_entity + + @staticmethod + async def list_team_myjoined_by_user_sub(user_sub: str, req: ListTeamRequest) -> list[TeamEntity]: + """列出我加入的团队,以及总数""" + try: + async with await DataBase.get_session() as session: + stmt = select(TeamEntity).join(TeamUserEntity, TeamEntity.id == TeamUserEntity.team_id).where( + and_(TeamUserEntity.user_id == user_sub, TeamEntity.status != TeamStatus.DELETED.value)) + if req.team_id: + stmt = stmt.where(TeamEntity.id == req.team_id) + if req.team_name: + stmt = stmt.where(TeamEntity.name.ilike(f"%{req.team_name}%")) + count_stmt = select(func.count()).select_from(stmt.subquery()) + total = (await session.execute(count_stmt)).scalar() + stmt = stmt.limit(req.page_size).offset((req.page - 1) * req.page_size) + stmt = stmt.order_by(TeamEntity.created_time.desc()) + result = await session.execute(stmt) + team_entities = result.scalars().all() + return (total, team_entities) + except Exception as e: + err = "列出我加入的团队失败" + logging.exception("[TeamManager] %s", err) + raise e + + @staticmethod + async def list_team_mycreated_user_sub(user_sub: str, req: ListTeamRequest) -> list[TeamEntity]: + """列出我创建的团队""" + try: + async with await DataBase.get_session() as session: + stmt = select(TeamEntity).where(and_( + TeamEntity.author_id == user_sub, TeamEntity.status != TeamStatus.DELETED.value)) + if req.team_id: + stmt = stmt.where(TeamEntity.id == req.team_id) + if req.team_name: + stmt = stmt.where(TeamEntity.name.ilike(f"%{req.team_name}%")) + count_stmt = select(func.count()).select_from(stmt.subquery()) + total = (await session.execute(count_stmt)).scalar() + stmt = stmt.limit(req.page_size).offset((req.page - 1) * req.page_size) + stmt = stmt.order_by(TeamEntity.created_time.desc()) + result = await session.execute(stmt) + team_entities = result.scalars().all() + return (total, team_entities) + except Exception as e: + err = "列出我创建的团队失败" + logging.exception("[TeamManager] %s", err) + raise e + + @staticmethod + async def list_pulic_team(req: ListTeamRequest) -> list[TeamEntity]: + """列出公开的团队""" + try: + async with await DataBase.get_session() as session: + stmt = select(TeamEntity).where(and_( + TeamEntity.status != TeamStatus.DELETED.value, TeamEntity.is_public == True)) + if req.team_id: + stmt = stmt.where(TeamEntity.id == req.team_id) + if req.team_name: + stmt = stmt.where(TeamEntity.name.ilike(f"%{req.team_name}%")) + count_stmt = select(func.count()).select_from(stmt.subquery()) + total = (await session.execute(count_stmt)).scalar() + stmt = stmt.limit(req.page_size).offset((req.page - 1) * req.page_size) + stmt = stmt.order_by(TeamEntity.created_time.desc()) + result = await session.execute(stmt) + team_entities = result.scalars().all() + return (total, team_entities) + except Exception as e: + err = "列出公开的团队失败" + logging.exception("[TeamManager] %s", err) + raise e + + @staticmethod + async def delete_team_by_id(team_id: uuid.UUID) -> uuid.UUID: + """删除团队""" + try: + async with await DataBase.get_session() as session: + stmt = delete(TeamEntity).where(TeamEntity.id == team_id) + await session.execute(stmt) + await session.commit() + except Exception as e: + err = "删除团队失败" + logging.exception("[TeamManager] %s", err) + raise e + return team_id + + @staticmethod + async def delete_teams_deleted() -> None: + """删除团队""" + try: + async with await DataBase.get_session() as session: + stmt = delete(TeamEntity).where(TeamEntity.status == TeamStatus.DELETED.value) + await session.execute(stmt) + await session.commit() + except Exception as e: + err = "删除团队失败" + logging.exception("[TeamManager] %s", err) + raise e + + @staticmethod + async def update_team_by_id(team_id: uuid.UUID, team_dict: Dict[str, str]) -> TeamEntity: + """更新团队""" + try: + async with await DataBase.get_session() as session: + stmt = update(TeamEntity).where(TeamEntity.id == team_id).values(**team_dict) + await session.execute(stmt) + await session.commit() + stmt = select(TeamEntity).where(TeamEntity.id == team_id) + result = await session.execute(stmt) + team_entity = result.scalars().first() + return team_entity + except Exception as e: + err = "更新团队失败" + logging.exception("[TeamManager] %s", err) + raise e diff --git a/data_chain/manager/testcase_manager.py b/data_chain/manager/testcase_manager.py new file mode 100644 index 0000000..95c9300 --- /dev/null +++ b/data_chain/manager/testcase_manager.py @@ -0,0 +1,74 @@ + +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. +from sqlalchemy import select, delete, update, desc, asc, func, exists, or_, and_ +from sqlalchemy.orm import aliased +import uuid +from typing import Dict, List, Optional, Tuple +from data_chain.logger.logger import logger as logging +from data_chain.stores.database.database import DataBase, TestingEntity, TestCaseEntity + + +class TestCaseManager(): + """测试用例管理类""" + + @staticmethod + async def add_test_case(test_case_entity: TestCaseEntity) -> TestCaseEntity: + """添加测试用例""" + try: + async with await DataBase.get_session() as session: + session.add(test_case_entity) + await session.commit() + await session.refresh(test_case_entity) + return test_case_entity + except Exception as e: + err = "添加测试用例失败" + logging.exception("[TestCaseManager] %s", err) + + @staticmethod + async def add_test_cases(test_case_entities: List[TestCaseEntity]) -> List[TestCaseEntity]: + """批量添加测试用例""" + try: + async with await DataBase.get_session() as session: + session.add_all(test_case_entities) + await session.commit() + for test_case_entity in test_case_entities: + await session.refresh(test_case_entity) + return test_case_entities + except Exception as e: + err = "批量添加测试用例失败" + logging.exception("[TestCaseManager] %s", err) + + @staticmethod + async def list_test_case_by_testing_id(testing_id: uuid.UUID) -> List[TestCaseEntity]: + """根据测试ID查询测试用例""" + try: + async with await DataBase.get_session() as session: + stmt = ( + select(TestCaseEntity) + .where(TestCaseEntity.testing_id == testing_id) + ) + stmt = stmt.order_by(TestCaseEntity.created_at.desc()) + stmt = stmt.order_by(TestCaseEntity.id.asc()) + result = await session.execute(stmt) + return result.scalars().all() + except Exception as e: + err = "查询测试用例失败" + logging.exception("[TestCaseManager] %s", err) + raise e + + @staticmethod + async def update_test_case_by_testing_id(testing_id: uuid.UUID, test_case_dict: Dict[str, str]) -> None: + """根据测试ID更新测试用例""" + try: + async with await DataBase.get_session() as session: + stmt = ( + update(TestCaseEntity) + .where(TestCaseEntity.testing_id == testing_id) + .values(**test_case_dict) + ) + await session.execute(stmt) + await session.commit() + except Exception as e: + err = "更新测试用例失败" + logging.exception("[TestCaseManager] %s", err) + raise e diff --git a/data_chain/manager/testing_manager.py b/data_chain/manager/testing_manager.py new file mode 100644 index 0000000..86689db --- /dev/null +++ b/data_chain/manager/testing_manager.py @@ -0,0 +1,151 @@ + +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. +from sqlalchemy import select, delete, update, desc, asc, func, exists, or_, and_ +from sqlalchemy.orm import aliased +import uuid +from typing import Dict, List, Optional, Tuple +from data_chain.logger.logger import logger as logging +from data_chain.stores.database.database import DataBase, DataSetEntity, TestingEntity, TaskEntity +from data_chain.entities.request_data import ListTestingRequest +from data_chain.entities.enum import DataSetStatus, TestingStatus, TaskStatus + + +class TestingManager(): + """测试管理类""" + + @staticmethod + async def add_testing(testing_entity: TestingEntity) -> TestingEntity: + """添加测试""" + try: + async with await DataBase.get_session() as session: + session.add(testing_entity) + await session.commit() + await session.refresh(testing_entity) + return testing_entity + except Exception as e: + err = "添加测试失败" + logging.exception("[TestingManager] %s", err) + + @staticmethod + async def get_testing_by_testing_id(testing_id: uuid.UUID) -> Optional[TestingEntity]: + """根据测试ID查询测试""" + try: + async with await DataBase.get_session() as session: + stmt = ( + select(TestingEntity) + .where(TestingEntity.testing_id == testing_id) + ) + result = await session.execute(stmt) + return result.scalars().first() + except Exception as e: + err = "查询测试失败" + logging.exception("[TestingManager] %s", err) + raise e + + @staticmethod + async def list_testing_by_dataset_id(dataset_id: uuid.UUID) -> List[TestingEntity]: + """根据数据集ID查询测试""" + try: + async with await DataBase.get_session() as session: + stmt = ( + select(TestingEntity) + .where(TestingEntity.dataset_id == dataset_id) + ) + stmt = stmt.where(TestingEntity.status != TestingStatus.DELETED.value) + stmt = stmt.order_by(desc(TestingEntity.created_at)) + stmt = stmt.order_by(asc(TestingEntity.id)) + result = await session.execute(stmt) + return result.scalars().all() + except Exception as e: + err = "查询测试失败" + logging.exception("[TestingManager] %s", err) + raise e + + @staticmethod + async def list_testing_unique_dataset_ids(req: ListTestingRequest) -> tuple[int, List[uuid.UUID]]: + """根据知识库ID查询测试""" + try: + async with await DataBase.get_session() as session: + subq = (select(TaskEntity.op_id, TaskEntity.status, func.row_number().over( + partition_by=TaskEntity.op_id, order_by=desc(TaskEntity.created_time)).label('rn')).subquery()) + + stmt = ( + select(TestingEntity.dataset_id) + .outerjoin(subq, and_(TestingEntity.id == subq.c.op_id, subq.c.rn == 1)) + .outerjoin(DataSetEntity, TestingEntity.dataset_id == DataSetEntity.id) + ) + stmt = stmt.where(DataSetEntity.status != DataSetStatus.DELETED.value) + stmt = stmt.where(TestingEntity.status != TestingStatus.DELETED.value) + if req.kb_id: + stmt = stmt.where(TestingEntity == req.kb_id) + if req.testing_id: + stmt = stmt.where(TestingEntity.id == req.testing_id) + if req.testing_name: + stmt = stmt.where(TestingEntity.name.ilike(f"%{req.testing_name}%")) + if req.llm_id: + stmt = stmt.where(TestingEntity.llm_id == req.llm_id) + if req.run_status: + stmt = stmt.where(subq.c.status.in_([status.value for status in req.run_status])) + if req.author_name: + stmt = stmt.where(TestingEntity.author_name.ilike(f"%{req.author_name}%")) + stmt = stmt.distinct(TestingEntity.dataset_id) + count_stmt = select(func.count()).select_from(stmt.subquery()) + total = (await session.execute(count_stmt)).scalar() + stmt = stmt.order_by(desc(TestingEntity.created_at)) + stmt = stmt.order_by(asc(TestingEntity.id)) + stmt = stmt.offset((req.page - 1) * req.page_size).limit(req.page_size) + result = await session.execute(stmt) + dataset_ids = result.scalars().all() + return total, dataset_ids + except Exception as e: + err = "查询测试关联的dataset_id失败" + logging.exception("[TestingManager] %s", err) + raise e + + @staticmethod + async def update_testing_by_testing_id(testing_id: uuid.UUID, testing_dict: Dict[str, str]) -> TestingEntity: + """根据测试ID更新测试""" + try: + async with await DataBase.get_session() as session: + stmt = ( + update(TestingEntity) + .where(TestingEntity.testing_id == testing_id) + .values(**testing_dict) + ) + await session.execute(stmt) + await session.commit() + stmt = ( + select(TestingEntity) + .where(TestingEntity.testing_id == testing_id) + ) + result = await session.execute(stmt) + return result.scalars().first() + except Exception as e: + err = "更新测试失败" + logging.exception("[TestingManager] %s", err) + raise e + + @staticmethod + async def update_testing_by_testing_ids( + testing_ids: list[uuid.UUID], + testing_dict: Dict[str, str]) -> list[TestingEntity]: + """批量更新测试""" + try: + async with await DataBase.get_session() as session: + stmt = ( + update(TestingEntity) + .where(TestingEntity.testing_id.in_(testing_ids)) + .values(**testing_dict) + ).returning(TestingEntity) + await session.execute(stmt) + await session.commit() + stmt = ( + select(TestingEntity) + .where(TestingEntity.testing_id.in_(testing_ids)) + ) + result = await session.execute(stmt) + return result.scalars().all() + except Exception as e: + err = "批量更新测试失败" + logging.exception("[TestingManager] %s", err) + raise e diff --git a/data_chain/manager/touch b/data_chain/manager/touch new file mode 100644 index 0000000..e69de29 diff --git a/data_chain/manager/user_manager.py b/data_chain/manager/user_manager.py index 90c9eb7..2277872 100644 --- a/data_chain/manager/user_manager.py +++ b/data_chain/manager/user_manager.py @@ -1,106 +1,22 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from sqlalchemy import select,delete +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from sqlalchemy import select, delete from data_chain.logger.logger import logger as logging -from data_chain.stores.postgres.postgres import PostgresDB, User - - +from data_chain.entities.enum import UserStatus +from data_chain.stores.database.database import DataBase, UserEntity class UserManager: @staticmethod - async def add_user(name,email, account, passwd): - user_slice = User( - name=name, - email=email, - account=account, - passwd=passwd - ) + async def add_user(user_entity: UserEntity) -> bool: try: - async with await PostgresDB.get_session() as session: - session.add(user_slice) - await session.commit() - await session.refresh(user_slice) - except Exception as e: - logging.error(f"Add user failed due to error: {e}") - return None - return user_slice - - @staticmethod - async def del_user_by_user_id(user_id): - try: - async with await PostgresDB.get_session() as session: - # 使用执行SQL语句的方式获取并删除用户 - stmt = select(User).where(User.id == user_id) - result = await session.execute(stmt) - user_to_delete = result.scalars().first() - - if user_to_delete is not None: - delete_stmt = delete(User).where(User.id==user_id) - result = await session.execute(delete_stmt) - await session.commit() - except Exception as e: - logging.error(f"Delete user failed due to error: {e}") - return False - return True - - @staticmethod - async def update_user_by_user_id(user_id, tmp_dict: dict): - try: - async with await PostgresDB.get_session() as session: - # 使用执行SQL语句的方式获取用户 - stmt = select(User).where(User.id == user_id) - result = await session.execute(stmt) - user_to_update = result.scalars().first() - - if user_to_update is None: - raise ValueError(f"No user found with id {user_id}") - - # 更新用户属性 - for key, value in tmp_dict.items(): - if hasattr(user_to_update, key): - setattr(user_to_update, key, value) - else: - logging.error(f"Attribute {key} does not exist on User model") - + async with await DataBase.get_session() as session: + session.add(user_entity) await session.commit() + await session.refresh(user_entity) return True except Exception as e: - logging.error(f"Failed to update user: {e}") - return False - - @staticmethod - async def get_user_info_by_account(account): - try: - async with await PostgresDB.get_session() as session: - stmt = select(User).where(User.account == account) - result = await session.execute(stmt) - user = result.scalars().first() - return user - except Exception as e: - logging.error(f"Failed to get user info by account: {e}") - return None - @staticmethod - async def get_user_info_by_email(email): - try: - async with await PostgresDB.get_session() as session: - stmt = select(User).where(User.email == email) - result = await session.execute(stmt) - user = result.scalars().first() - return user - except Exception as e: - logging.error(f"Failed to get user info by account: {e}") - return None - @staticmethod - async def get_user_info_by_user_id(user_id): - result = None - try: - async with await PostgresDB.get_session() as session: - stmt = select(User).where(User.id == user_id) - result = await session.execute(stmt) - result = result.scalars().first() - except Exception as e: - logging.error(f"Get user failed due to error: {e}") - return result - + err = "用户添加失败" + logging.exception("[UserManger] %s", err) + return False diff --git a/data_chain/manager/vector_items_manager.py b/data_chain/manager/vector_items_manager.py deleted file mode 100644 index bfc9a9a..0000000 --- a/data_chain/manager/vector_items_manager.py +++ /dev/null @@ -1,201 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -import uuid -from typing import List -from sqlalchemy import insert, delete, update, text -import traceback -from data_chain.logger.logger import logger as logging -from data_chain.stores.postgres.postgres import PostgresDB,TemporaryVectorItemstEntity - - - - -class VectorItemsManager: - @staticmethod - async def add(VectorItems, vector): - # 构建插入语句 - insert_stmt = insert(VectorItems).values( - user_id=vector['user_id'], - chunk_id=vector['chunk_id'], - kb_id=vector['kb_id'], - document_id=vector['doc_id'], - vector=vector['vector'], - enabled=vector['enabled'] - ).returning(VectorItems.c.id) - - # 获取会话 - async with await PostgresDB.get_session() as session: - result = await session.execute(insert_stmt) - inserted_id = result.scalar() - await session.commit() - return inserted_id - @staticmethod - async def add_all(VectorItems, vector_list): - # 构建插入语句 - insert_stmt = ( - insert(VectorItems) - .values([ - { - "user_id": vector['user_id'], - "chunk_id": vector['chunk_id'], - "kb_id": vector['kb_id'], - "document_id": vector['doc_id'], - "vector": vector['vector'], - "enabled": vector['enabled'] - } - for vector in vector_list - ]) - .returning(VectorItems.c.id) # 假设VectorItems有id字段 - ) - - # 获取会话 - async with await PostgresDB.get_session() as session: - result = await session.execute(insert_stmt) - inserted_ids = result.scalars().all() - await session.commit() - return inserted_ids - @staticmethod - async def del_by_id(VectorItems, id): - try: - # 构建删除语句 - delete_stmt = delete(VectorItems).where(VectorItems.c.id == id) - - # 获取会话 - async with await PostgresDB.get_session() as session: - await session.execute(delete_stmt) - await session.commit() - except Exception as e: - logging.error(f"Delete vector item failed due to error: {e}") - - @staticmethod - async def del_by_chunk_ids(VectorItems, chunk_ids): - try: - # 构建删除语句 - delete_stmt = delete(VectorItems).where(VectorItems.c.chunk_id.in_(chunk_ids)) - - # 获取会话 - async with await PostgresDB.get_session() as session: - await session.execute(delete_stmt) - await session.commit() - except Exception as e: - logging.error(f"Delete vector item failed due to error: {e}") - - @staticmethod - async def del_by_doc_ids(VectorItems, doc_ids): - try: - # 构建删除语句 - delete_stmt = delete(VectorItems).where(VectorItems.c.doc_id.in_(doc_ids)) - - # 获取会话 - async with await PostgresDB.get_session() as session: - await session.execute(delete_stmt) - await session.commit() - except Exception as e: - logging.error(f"Delete vector item failed due to error: {e}") - - @staticmethod - async def update_by_chunk_id(VectorItems, chunk_id, up_dict): - try: - # 构建删除语句 - update_stmt = update(VectorItems).where(VectorItems.c.chunk_id == chunk_id).values(**up_dict) - # 获取会话 - async with await PostgresDB.get_session() as session: - await session.execute(update_stmt) - await session.commit() - except Exception as e: - logging.error(f"Update vector item failed due to error: {e}") - @staticmethod - async def find_top_k_similar_vectors(VectorItems, target_vector, kb_id, topk=3, banned_ids=[]): - try: - if topk<=0: - return [] - # 构造查询 - if banned_ids: - query_sql = ( - f"SELECT v.chunk_id " - f"FROM \"{VectorItems.name}\" AS v " - f"INNER JOIN document ON v.document_id = document.id " - f"WHERE v.kb_id = :kb_id AND v.chunk_id!=ANY(:banned_ids) AND v.enabled = true AND document.enabled = true " - f"ORDER BY v.vector <=> :target_vector " - f"LIMIT :topk") - else: - query_sql = ( - f"SELECT v.chunk_id " - f"FROM \"{VectorItems.name}\" AS v " - f"INNER JOIN document ON v.document_id = document.id " - f"WHERE v.kb_id = :kb_id AND v.enabled = true AND document.enabled = true " - f"ORDER BY v.vector <=> :target_vector " - f"LIMIT :topk") - async with await PostgresDB.get_session() as session: - # 使用execute执行原始SQL语句,并传递参数 - result = await session.execute( - text(query_sql), - { - "kb_id": kb_id, - "banned_ids": banned_ids, - "target_vector": str(target_vector), - "topk": topk - } - ) - result = result.scalars().all() - return result - except Exception as e: - logging.error(f"Query for similar vectors failed due to error: {e}") - logging.error(f"Error details: {traceback.format_exc()}") - return [] -class TemporaryVectorItemsManager: - @staticmethod - async def add(temporary_vector_items_entity:TemporaryVectorItemstEntity)->TemporaryVectorItemstEntity: - try: - async with await PostgresDB.get_session() as session: - session.add(temporary_vector_items_entity) - await session.commit() - await session.refresh(temporary_vector_items_entity) - return temporary_vector_items_entity - except Exception as e: - logging.error(f"Add temporary vector items failed due to error: {e}") - return None - @staticmethod - async def add_all(temporary_vector_items_entity_list:List[TemporaryVectorItemstEntity])->List[TemporaryVectorItemstEntity]: - try: - async with await PostgresDB.get_session() as session: - session.add_all(temporary_vector_items_entity_list) - await session.commit() - for temporary_vector_items_entity in temporary_vector_items_entity_list: - await session.refresh(temporary_vector_items_entity) - return temporary_vector_items_entity_list - except Exception as e: - logging.error(f"Add temporary vector items failed due to error: {e}") - return None - @staticmethod - async def find_top_k_similar_temporary_vectors(target_vector, document_ids:List[uuid.UUID],topk=3)->List[uuid.UUID]: - try: - if topk<=0: - return [] - # 构造查询 - if document_ids: - query_sql = ( - f"SELECT v.chunk_id " - f"FROM temporary_vector_items AS v " - f"INNER JOIN temporary_document ON v.document_id = temporary_document.id " - f"WHERE v.document_id= ANY(:document_ids) AND temporary_document.status!='deleted'" - f"ORDER BY v.vector <=> :target_vector " - f"LIMIT :topk" - ) - else: - return [] - # 获取会话并执行查询 - async with await PostgresDB.get_session() as session: - # 使用execute执行原始SQL语句,并传递参数 - result = await session.execute( - text(query_sql), - { - "document_ids": document_ids, - "target_vector": str(target_vector), - "topk": topk - } - ) - result = result.scalars().all() - return result - except Exception as e: - logging.error(f"Query for similar temporary vectors failed due to error: {e}") - return [] \ No newline at end of file diff --git a/data_chain/models/api.py b/data_chain/models/api.py deleted file mode 100644 index 319ee25..0000000 --- a/data_chain/models/api.py +++ /dev/null @@ -1,240 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -# TODO: 给其中某些属性加上参数约束, 例如page或者count之类的 -import re -import uuid -from typing import Dict, Generic, List, Optional, TypeVar - -from data_chain.models.service import DocumentTypeDTO - -from pydantic import BaseModel, Field, validator, constr - -T = TypeVar('T') - - -class DictionaryBaseModel(BaseModel): - def keys(self): - return [key for key in self.__dict__ if getattr(self, key) is not None] - - def __getitem__(self, item): - return getattr(self, item) - - -class BaseResponse(BaseModel, Generic[T]): - retcode: int = 200 - retmsg: str = "ok" - data: Optional[T] - - -class Page(DictionaryBaseModel, Generic[T]): - page_number: int = 1 - page_size: int = 10 - total: int - data_list: Optional[List[T]] - - -class CreateKnowledgeBaseRequest(DictionaryBaseModel): - name: str = Field(..., min_length=1, max_length=150) - language: str = Field(..., pattern=r"^(zh|en)$") - description: Optional[str] = Field(None, max_length=150) - embedding_model: str = Field(..., pattern=r"^(bge_large_zh|bge_large_en)$") - default_parser_method: str - default_chunk_size: int = Field(1024, ge=128, le=1024) - document_type_list: Optional[List[str]] - - -class UpdateKnowledgeBaseRequest(DictionaryBaseModel): - id: uuid.UUID - name: Optional[str] = Field(None, min_length=1, max_length=150) - language: Optional[str] = Field(None, pattern=r"^(zh|en)$") - description: Optional[str] - embedding_model: Optional[str] = Field(None, pattern=r"^(bge_large_zh|bge_large_en)$") - default_parser_method: Optional[str] = None - default_chunk_size: Optional[int] = Field(None, ge=128, le=1024) - document_type_list: Optional[List[DocumentTypeDTO]] = None - - -class ListKnowledgeBaseRequest(DictionaryBaseModel): - id: Optional[uuid.UUID] = None - name: Optional[str] = None - document_count_order: Optional[str] = 'desc' # 取值desc降序, asc升序 - created_time_order: Optional[str] = 'desc' # 取值desc降序, asc升序 - created_time_start: Optional[str] = None - created_time_end: Optional[str] = None - page_number: int = 1 - page_size: int = 10 - - -class DeleteKnowledgeBaseRequest(DictionaryBaseModel): - id: uuid.UUID - - -class ExportKnowledgeBaseRequest(DictionaryBaseModel): - id: uuid.UUID - - -class ListKnowledgeBaseTaskRequest(DictionaryBaseModel): - pass - - -class StopTaskRequest(DictionaryBaseModel): - task_id: uuid.UUID - - -class RmoveTaskRequest(DictionaryBaseModel): - task_id: Optional[uuid.UUID] = None - types: Optional[List[str]] = None - - -class ListTaskRequest(DictionaryBaseModel): - id: Optional[uuid.UUID] = None - op_id: Optional[uuid.UUID] = None - types: Optional[List[str]] = None - status: Optional[str] = None - page_number: int = 1 - page_size: int = 10 - created_time_order: Optional[str] = 'desc' # 取值desc降序, asc升序 - - -class ListDocumentRequest(DictionaryBaseModel): - kb_id: Optional[uuid.UUID] = None - id: Optional[uuid.UUID] = None - name: Optional[str] = None - document_type_list: Optional[List[uuid.UUID]] = None - created_time_order: Optional[str] = 'desc' - created_time_start: Optional[str] = None - created_time_end: Optional[str] = None - status: Optional[List[str]] = None - enabled: Optional[bool] = None - parser_method: Optional[List[str]] = None - page_number: int = 1 - page_size: int = 10 - - @validator('status', each_item=True) - def check_types(cls, v): - # 定义允许的类型正则表达式 - allowed_type_pattern = r"^(pending|success|failed|running|canceled)$" - if not re.match(allowed_type_pattern, v): - raise ValueError(f'Invalid type value "{v}". Must match pattern {allowed_type_pattern}.') - return v - - -class UpdateDocumentRequest(DictionaryBaseModel): - id: uuid.UUID - name: Optional[str] = Field(None, min_length=1, max_length=128) - parser_method: Optional[str] = Field(None, pattern=r"^(general|ocr|enhanced)$") - type_id: Optional[uuid.UUID] = None - chunk_size: Optional[int] = Field(None, gt=127, lt=1025) - - -class RunDocumentRequest(DictionaryBaseModel): - ids: List[uuid.UUID] - run: str = Field(..., pattern=r"^(run|cancel)$") # run运行或者cancel取消 - - -class SwitchDocumentRequest(DictionaryBaseModel): - id: uuid.UUID - enabled: bool # True启用, False未启用 - - -class DeleteDocumentRequest(DictionaryBaseModel): - ids: List[uuid.UUID] - - -class DownloadDocumentRequest(DictionaryBaseModel): - ids: List[uuid.UUID] - - -class GetTemporaryDocumentStatusRequest(DictionaryBaseModel): - ids: List[uuid.UUID] - - -class TemporaryDocumentInParserRequest(DictionaryBaseModel): - id: uuid.UUID - name: str = Field(..., min_length=1, max_length=128) - type: str = Field(..., min_length=1, max_length=128) - bucket_name: str = Field(..., min_length=1, max_length=128) - parser_method: str = Field("ocr", pattern=r"^(general|ocr)$") - chunk_size: int = Field(1024, ge=128, le=1024) - - -class ParserTemporaryDocumenRequest(DictionaryBaseModel): - document_list: List[TemporaryDocumentInParserRequest] - - -class DeleteTemporaryDocumentRequest(DictionaryBaseModel): - ids: List[uuid.UUID] - - -class RelatedTemporaryDocumenRequest(DictionaryBaseModel): - content: str - top_k: int = Field(5, ge=0, le=10) - kb_sn: Optional[uuid.UUID] = None - document_ids: Optional[List[uuid.UUID]] = None - - -class ListChunkRequest(DictionaryBaseModel): - document_id: uuid.UUID - text: Optional[str] = None - page_number: int = 1 - types: Optional[List[str]] = None - page_size: int = 10 - # 定义一个验证器来确保types中的每个元素都符合正则表达式 - - @validator('types', each_item=True) - def check_types(cls, v): - # 定义允许的类型正则表达式 - allowed_type_pattern = r"^(para|table|image)$" # 替换为你需要的正则表达式 - if not re.match(allowed_type_pattern, v): - raise ValueError(f'Invalid type value "{v}". Must match pattern {allowed_type_pattern}.') - return v - - -class GetChunkRequst(DictionaryBaseModel): - content: str=Field(default='') - kb_sn: Optional[str] = None - topk: int = Field(default=5, ge=0, le=10) - - -class SwitchChunkRequest(DictionaryBaseModel): - ids: List[uuid.UUID] # 支持批量操作 - enabled: bool # True启用, False未启用 - -class GetChunkRequest(DictionaryBaseModel): - content: str - kb_sn: Optional[str]=None - topk: int=Field(default=5, ge=0, le=20) - retrieval_mode: str=Field(default='chunk', choices=['chunk', 'full_text']) -class AddUserRequest(BaseModel): - name: str = Field(..., min_length=1, max_length=10, description="用户名,长度在1到10个字符") - email: Optional[str] = Field(None, min_length=5, max_length=30, - pattern='^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$', description="邮箱,长度在5到30个字符") - account: str = Field(..., min_length=5, max_length=20, - pattern="^[a-z0-9]+$", description="账号,由小写字母和数字组成,长度在5到20个字符") - passwd: str = Field(..., min_length=63, max_length=65, description="密码的哈希") - - -class UpdateUserRequest(DictionaryBaseModel): - name: Optional[str] = None - account: Optional[str] = None - passwd: Optional[str] = None - role: Optional[str] = None - status: Optional[str] = None - language: Optional[str] = None - - -class UpdateModelRequest(DictionaryBaseModel): - id: Optional[uuid.UUID] = Field(None) - model_name: Optional[str] = Field(None, min_length=1, max_length=128) - openai_api_base: Optional[str] = Field(None, min_length=1, max_length=128) - openai_api_key: Optional[str] = Field(None, min_length=1, max_length=128) - max_tokens: Optional[int] = Field(None, ge=1024, le=8192) - is_online: bool = Field(default=True) - - -class QueryRequest(BaseModel): - question: str - kb_sn: Optional[uuid.UUID] = None - document_ids: Optional[List[uuid.UUID]] = None - top_k: int = Field(5, ge=0, le=10) - fetch_source: bool = False - history: Optional[List] = [] diff --git a/data_chain/models/constant.py b/data_chain/models/constant.py deleted file mode 100644 index 6720a6f..0000000 --- a/data_chain/models/constant.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. - -class BaseConstant(): - @classmethod - def get_all_values(cls): - return [value for name, value in cls.__dict__.items() - if not name.startswith("__") and isinstance(value, str)] -class OssConstant(BaseConstant): - IMPORT_FILE_SAVE_FOLDER = "./stash" - EXPORT_FILE_SAVE_FOLDER = "./export" - UPLOAD_DOCUMENT_SAVE_FOLDER = "./document" - ZIP_FILE_SAVE_FOLDER = "./zip" - PARSER_SAVE_FOLDER = "./parser" - - MINIO_BUCKET_DOCUMENT = "document" - MINIO_BUCKET_KNOWLEDGEBASE = "knowledgebase" - MINIO_BUCKET_PICTURE = "picture" - -class DocumentEmbeddingConstant(BaseConstant): - DOCUMENT_EMBEDDING_RUN = 'run' - DOCUMENT_EMBEDDING_CANCEL = 'cancel' - - DOCUMENT_EMBEDDING_STATUS_PENDING = "pending" - DOCUMENT_EMBEDDING_STATUS_RUNNING = "running" - -class DocumentStatusEnum(BaseConstant): - PENDIND='pending' - RUNNING='running' - DELETED='deleted' - -class TemporaryDocumentStatusEnum(BaseConstant): - EXIST='exist' - DELETED='deleted' -class TaskConstant(BaseConstant): - TASK_REDIS_QUEUE_KEY = "TASK_QUEUE" - - TASK_STATUS_PENDING = "pending" - TASK_STATUS_SUCCESS = "success" - TASK_STATUS_FAILED = "failed" - TASK_STATUS_RUNNING = "running" - TASK_STATUS_CANCELED = "canceled" - TASK_STATUS_DELETED = "deleted" - - IMPORT_KNOWLEDGE_BASE = "import_knowledge_base" - EXPORT_KNOWLEDGE_BASE = "export_knowledge_base" - PARSE_DOCUMENT = "parse_document" - PARSE_TEMPORARY_DOCUMENT = "parse_temporary_document" - -class KnowledgeStatusEnum(BaseConstant): - IMPORTING = "importing" - EXPROTING = "exporting" - IDLE = 'idle' - DELETE = 'delete' - -class TaskActionEnum(BaseConstant): - CANCEL = "cancel" - RESTART = "restart" - DELETE = "delete" - -class KnowledgeLanguageEnum(BaseConstant): - ZH = "简体中文" - EN = "English" - -class EmbeddingModelEnum(BaseConstant): - BGE_LARGE_ZH = "bge_large_zh" - BGE_LARGE_EN = "bge_large_en" - -class ParseMethodEnum(BaseConstant): - GENERAL = "general" - OCR = "ocr" - ENHANCED = "enhanced" - -class ParseExtensionEnum(BaseConstant): - PDF = ".pdf" - DOCX = ".docx" - DOC = ".doc" - TXT = ".txt" - XLSX = ".xlsx" - HTML = ".html" - MD = ".md" - PPTX = ".pptx" - -class ChunkRelevance(BaseConstant): - IRRELEVANT = 1 - WEAKLY_RELEVANT = 2 - RELEVANT_BUT_LACKS_PREVIOUS_CONTEXT = 3 - RELEVANT_BUT_LACKS_FOLLOWING_CONTEXT = 4 - RELEVANT_BUT_LACKS_BOTH_CONTEXTS = 5 - RELEVANT_AND_COMPLETE = 6 - -default_document_type_id = '00000000-0000-0000-0000-000000000000' -embedding_model_out_dimensions = { - 'bge_large_zh': 1024, - 'bge_large_en': 1024 -} \ No newline at end of file diff --git a/data_chain/models/service.py b/data_chain/models/service.py deleted file mode 100644 index b4a5da4..0000000 --- a/data_chain/models/service.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -# TODO: 给其中某些属性加上参数约束, 例如page或者count之类的 -import datetime -import uuid -from typing import Dict, List, Optional - -from pydantic import BaseModel - - -class DictionaryBaseModelDTO(BaseModel): - def keys(self): - return [key for key in self.__dict__ if getattr(self, key) is not None] - - def __getitem__(self, item): - return getattr(self, item) - - -class DocumentTypeDTO(DictionaryBaseModelDTO): - id: uuid.UUID - type: str - - -class TaskReportDTO(DictionaryBaseModelDTO): - id: uuid.UUID - message: str - current_stage: int - stage_cnt: int - create_time: str - - -class TaskDTO(DictionaryBaseModelDTO): - id: uuid.UUID - type: str - retry: int - status: str - reports: List[TaskReportDTO] = [] - create_time: str - - -class KnowledgeBaseDTO(DictionaryBaseModelDTO): - id: str - name: str - language: str - description: str - embedding_model: str - default_parser_method: str - default_chunk_size: int - document_count: int = 0 - document_size: int = 0 - document_type_list: List[DocumentTypeDTO] - task: Optional[TaskDTO] = None - status: str - created_time: str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') - - -class DocumentDTO(DictionaryBaseModelDTO): - id: str - name: str - extension: str - document_type: DocumentTypeDTO - chunk_size: int - status: str - enabled: bool - created_time: str - task: Optional[TaskDTO] = None - parser_method: str - - -class TemporaryDocumentDTO(DictionaryBaseModelDTO): - id: uuid.UUID - status: str - - -class ChunkDTO(DictionaryBaseModelDTO): - id: str - text: str - enabled: bool - type: str - - -class ModelDTO(DictionaryBaseModelDTO): - id: Optional[str] = None - model_name: Optional[str] = None - model_type: Optional[str] = None - openai_api_base: Optional[str] = None - openai_api_key: Optional[str] = None - max_tokens: Optional[int] = None - is_online: Optional[bool] = None diff --git a/data_chain/parser/handler/base_parser.py b/data_chain/parser/handler/base_parser.py index c4112e1..113675c 100644 --- a/data_chain/parser/handler/base_parser.py +++ b/data_chain/parser/handler/base_parser.py @@ -1,502 +1,46 @@ -import json -import os -import uuid -import json - -import numpy as np - -import pptx.table -from data_chain.logger.logger import logger as logging -from pandas import DataFrame -from docx.table import Table as DocxTable -import pptx -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.metrics.pairwise import cosine_similarity -from data_chain.manager.document_manager import DocumentManager -from data_chain.manager.model_manager import ModelManager -from data_chain.parser.tools.split import split_tools -from data_chain.models.constant import OssConstant -from data_chain.apps.base.model.llm import LLM -from data_chain.apps.base.security.security import Security +from data_chain.parser.parse_result import ParseNode, ParseResult +from data_chain.entities.enum import DocParseRelutTopology, ChunkParseTopology, ChunkType from data_chain.logger.logger import logger as logging -from data_chain.models.constant import OssConstant -from data_chain.parser.tools.split import split_tools - -# TODO chunk和chunk_link可以封装成类 - -class BaseService: - - def __init__(self): - self.vectorizer = None - self.llm_max_tokens = None - self.llm = None - self.chunk_tokens = None - self.ocr_tool = None - - async def init_service(self, llm_entity, chunk_tokens, parser_method): - self.parser_method = parser_method - try: - if llm_entity is not None: - self.llm = LLM( - model_name=llm_entity.model_name, - openai_api_base=llm_entity.openai_api_base, - openai_api_key=Security.decrypt( - llm_entity.encrypted_openai_api_key, - json.loads(llm_entity.encrypted_config) - ), - max_tokens=llm_entity.max_tokens, - ) - self.llm_max_tokens = llm_entity.max_tokens - except Exception as e: - logging.error(f"INIT LLM error failed due to {e}") - self.chunk_tokens = chunk_tokens - self.vectorizer = TfidfVectorizer() +class BaseParser: @staticmethod - def get_uuid(): - """ - 获取uuid - 返回: - 生成的uuid - """ - return uuid.uuid4() - - def check_similarity(self, text1, text2): - """ - TODO :获取段落相似度,具体数值待微调 - """ - # 将文本转换为TF-IDF向量 - try: - if len(text1) < len(text2)*10: - tfidf_matrix = self.vectorizer.fit_transform([text1, text2]) - - # 计算余弦相似度 - cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2]) - if cosine_sim > 0.85: - return True - except Exception as e: - logging.error(f'Check_similarity error due to: {e}') - return False - - def merge_texts(self, texts): - now_len = 0 - now_text = "" - new_texts = [] - for text in texts: - if text['type'] == 'para': - if text['text'] == "": - continue - token_len = split_tools.get_tokens(text) - if now_len + token_len < max(self.chunk_tokens // 2, 128) or ( - now_len + token_len < self.chunk_tokens and self.check_similarity(now_text, text['text'])): - now_text += text['text'] + '\n' - now_len += token_len - else: - new_texts.append({'text': now_text, 'type': 'para'}) - now_text = text['text'] + '\n' - now_len = token_len - else: - if now_len: - new_texts.append({'text': now_text, 'type': 'para'}) - now_text = "" - now_len = 0 - new_texts.append(text) - if now_len: - new_texts.append({'text': now_text, 'type': 'para'}) - return new_texts + def find_worker_class(worker_name): + subclasses = BaseParser.__subclasses__() + for subclass in subclasses: + if subclass.name == worker_name: + return subclass + return None @staticmethod - def split_sentences(text, TOKENS=1024): - """ - 分句,不超过Tokens数量 - """ - try: - words = split_tools.split_words(text) - current_length = 0 - current_sentence = "" - result = [] - for word in words: - current_sentence = current_sentence + word - current_length = current_length + 1 - if current_length >= TOKENS: - result.append(current_sentence) - current_sentence = "" - current_length = 0 - result.append(current_sentence) - return result - except Exception as e: - logging.error(f"split sentences error as {e}") - return [] - - def split_table(self, table): - """ - 按照行分表 - """ - - if table is None: - return [] - result = [] - new_table = [] - cell_num = 1 - try: - if isinstance(table, DataFrame): - for index, row in table.iterrows(): - row_string_list = [s.replace('|', '||') for s in row.astype(str).tolist()] - cell_num = max(cell_num, len(row_string_list)) - new_table.append(row_string_list) - elif isinstance(table, DocxTable) or isinstance(table, pptx.table.Table): - if table.rows: - for row in table.rows: - row_string_list = [s.replace('|', '||') for s in (cell.text.strip() for cell in row.cells)] - cell_num = max(cell_num, len(row_string_list)) - new_table.append(row_string_list) - else: - logging.error(f"table type Error as{type(table)}") - return [] - except Exception as e: - logging.error(f"split tables error as{e}") - return [] - - max_tokens = (self.chunk_tokens - cell_num) // cell_num - for row in new_table: - new_line = [] - max_len = 0 - for cell in row: - cell = self.split_sentences(cell, max_tokens) - if not cell: - cell = [''] - new_line.append(cell) - max_len = max(max_len, len(cell)) - for i in range(max_len): - row_text = ' | '.join([cell[i] if len(cell) > i else ' ' for cell in new_line]) - row_text = row_text.replace('\n', '\\n') - result.append(row_text) - - return result - - async def ocr_from_images_in_lines(self, lines): - # 获取图像相邻文本 - last_para_pre = "" - for i in range(len(lines)): - line = lines[i] - if line['type'] == 'image': - lines[i]['related_text'] = last_para_pre - elif line['type'] == 'para': - last_para_pre = line['text'] - elif line['type'] == 'table': - pass - last_para_bac = "" - for i in range(len(lines) - 1, -1, -1): - line = lines[i] - if line['type'] == 'image': - lines[i]['related_text'] += last_para_bac - elif line['type'] == 'para': - last_para_bac = line['text'] - elif line['type'] == 'table': - pass - for line in lines: - if line['type'] == 'image': - line['text'] = await self.ocr_tool.image_to_text(line['image'], image_related_text=line['related_text']) - return lines - - async def change_lines(self, lines): - """ - 修整处理lines,根据不同的类型(图像、段落、表格)处理每一行,并根据method参数决定处理方式。 - - 参数: - - lines (list): 需要处理的行列表,每行包含内容和类型。 - 返回: - - tuple: 包含处理后的句子列表和图像列表的元组。 - """ - new_lines = [] - images = [] - last_para_id = None - for line in lines: - if line['type'] == 'image': - # 处理图像 - image_id = self.get_uuid() - image = line['image'] - image_bytes = image.tobytes() - image_extension = line['extension'] - await self.insert_image_to_tmp_folder(image_bytes, image_id, image_extension) - if self.parser_method in ['ocr', 'enhanced']: - # 将图片关联到图片的描述chunk上 - chunk_id = self.get_uuid() - new_lines.append({'id': chunk_id, - 'type': 'image'}) - new_lines[-1]['image'] = np.array(image) - images.append({ - 'id': image_id, - 'chunk_id': chunk_id, - 'extension': image_extension, - }) - else: - # 将图片关联到上一个段落chunk上 - images.append({ - 'id': image_id, - 'chunk_id': last_para_id, - 'extension': image_extension, - }) - - elif line['type'] == 'para': - # 处理段落 - new_lines.append({'id': self.get_uuid(), - 'text': line['text'], - 'type': line['type']}) - last_para_id = new_lines[-1]['id'] - - elif line["type"] == 'table': - # 处理表格 - new_lines.append({'id': self.get_uuid(), - 'text': line['text'], - 'type': line['type']}) - return new_lines, images - - def package_to_chunk(self, **kwargs): - """ - 整合成chunk - - 参数: - - id (str, optional): 目标uuid,默认生成一个新的UUID - - text (str, optional): 目标内容,默认为空字符串 - - tokens (int, optional): 词数,默认为0 - - status (str, optional): 状态,默认为空字符串 - - type_from (str, optional): 来源类型,默认为general - - type_big (str, optional): 大类型,默认为para - - type_small (str, optional): 小类型,默认为line - - type_attr (str, optional): 属性类型,默认为normal - - link_to (str, optional): 链接目标uuid,默认为空字符串 - - offset_in_document (int, optional): 在文档中的偏移量,默认为0 - - 返回: - - dict: 包含chunk信息的字典 - """ - # TODO:可以进行封装 - default_values = { - 'id': self.get_uuid(), - 'text': "", - 'tokens': 0, - 'status': "", - 'type_from': 'general', - 'type_big': 'para', - 'type_small': 'line', - 'type_attr': 'normal', - 'link_to': "", - 'enabled': True, - 'local_offset': 0, - 'global_offset': 0, - } + def image_related_node_in_link_nodes(nodes: list[ParseNode]) -> None: + text_node = None + for i in range(len(nodes)): + if nodes[i].type == ChunkType.TEXT: + text_node = nodes[i] + elif nodes[i].type == ChunkType.IMAGE: + if text_node: + nodes[i].link_nodes.append(text_node) + text_node = None + for i in range(len(nodes)-1, 0, -1): + if nodes[i].type == ChunkType.TEXT: + text_node = nodes[i] + elif nodes[i].type == ChunkType.IMAGE: + if text_node: + nodes[i].link_nodes.append(text_node) - # 更新默认值为传入的参数值 - for key, value in kwargs.items(): - if key in default_values: - default_values[key] = value - chunk_type = f"{default_values['type_from']}.{default_values['type_big']}." \ - f"{default_values['type_small']}.{default_values['type_attr']}" - - # 构建chunk字典 - chunk = { - 'id': default_values['id'], - 'text': default_values['text'], - 'type': chunk_type, - 'tokens': default_values['tokens'], - 'global_offset': default_values['global_offset'], - 'local_offset': default_values['local_offset'], - 'enabled': default_values['enabled'], - 'status': default_values['status'], - 'link_to': default_values['link_to'], - } - - return chunk - - def package_to_link(self, chunk_a, chunk_b, **kwargs): - """ - 打包link - 参数: - - chunk_a (str): 出发chunk的id - - chunk_b (str): 目标chunk的id - - is_global (str, optional): link为全局边或者局部边,默认为local - - structure (str, optional): link的小类,表示link属于line/tree/map,默认为line - - model (str, optional): link的模型,默认为pre - - jump (str or int, optional): 跳转值,默认为0 - - 返回: - - dict: 包含link信息的字典 - """ - - default_values = { - 'is_global': 'local', - 'structure': 'line', - 'model': 'pre', - 'jump': 0 - } - - # 更新默认值为传入的参数值 - for key, value in kwargs.items(): - if key in default_values: - default_values[key] = value - - # 确保 jump 是字符串类型 - jump = str(default_values['jump']) - - # 构建 link 字典 - link_type = (f"{default_values['is_global']}.{default_values['model']}." - f"{default_values['structure']}.{jump}") - link = { - 'id': self.get_uuid(), - 'chunk_a': chunk_a, - 'chunk_b': chunk_b, - 'type': link_type, - } - - return link - - def build_chunks_by_lines(self, sentences): - """ - chunks 连接函数 - sentences中需要type和text字段 - """ - sentences = self.merge_texts(sentences) - chunks = [] - local_count = 0 - para_count = 0 - now_type = 'None' - last_para = None - last_local = None - chunks_para = [] - local_offset = 0 - global_offset = 0 - for part in sentences: - global_offset += 1 - local_offset += 1 - if now_type != part['type']: - last_local = None - local_offset = 0 - if part['type'] == now_type: - local_count += 1 - type_attr = 'normal' - else: - type_attr = 'head' - local_count = 0 - if part['type'] == 'para': - link_to = last_para - para_count += 1 - else: - link_to = last_local - - now_type = part['type'] - if 'id' not in part: - part['id'] = self.get_uuid() - chunk = self.package_to_chunk(id=part["id"], text=part["text"], tokens=split_tools.get_tokens(part["text"]), - link_to=link_to, status="", type_from="general", type_big=part["type"], - type_small="line", type_attr=type_attr, global_offset=global_offset, - local_offset=local_offset, ) - last_local = chunk['id'] - chunks.append(chunk) - if now_type == 'para': - last_para = chunk['id'] - chunks_para.append(chunk) - return chunks - - def build_chunks_and_links_by_tree(self, tree: dict): - """ - chunks 连接函数 - tree为dict表示的树结构, - """ - chunks = [] - - def get_edges(node, parent_id=None, dfs_order=0): - chunk = self.package_to_chunk(text=node["text"], tokens=split_tools.get_tokens(tree["text"]), status="", - type_big=node["type"], type_small='tree', type_attr=node['type_attr'], - global_offset=dfs_order, link_to=parent_id, ) - node['id'] = chunk['id'] - chunks.append(chunk) - - # 如果当前节点有子节点,则遍历每个子节点 - if 'children' in node and node['children']: - for child in node['children']: - # 递归处理子节点 - dfs_order = get_edges(child, node['id'], dfs_order+1) - return dfs_order - - get_edges(tree) - - chunk_links = [] - chunk_links.extend(self.edge_link(chunks, 'global', 'tree')) - return chunks, chunk_links - - def build_chunk_links_by_line(self, chunks): - """ - 线性分割chunks并构建上下文关系 - """ - chunk_links = [] - chunks_para = [] - tmp_chunks = [] - for chunk in chunks: - if chunk['type'] == 'para': - if tmp_chunks is not None and len(tmp_chunks) > 0: - chunk_links.extend(self.edge_link(tmp_chunks, 'local', 'line')) - tmp_chunks = [] - else: - tmp_chunks.append(chunk) - chunk_links.extend(self.edge_link(chunks_para, 'local', 'line')) - chunk_links.extend(self.edge_link(chunks, 'global', 'line')) - return chunk_links - - def edge_link(self, chunks, is_global, structure, **kwargs): - """ - 根据给定的块列表构建边缘链接。 - 该函数通过遍历每个块,并为每个块与其链接的目标块创建双向链接数据。 - 然后,根据这些链接数据生成链接对象列表。 - - """ - links = [] - links_data = [] - for chunk in chunks: - links_data.append({ - 'chunk_a': chunk['id'], - 'chunk_b': chunk['link_to'], - 'is_global': is_global, - 'structure': structure, - 'model': 'next', - 'jump': 0 - }) - links_data.append({ - 'chunk_a': chunk['link_to'], - 'chunk_b': chunk['id'], - 'is_global': is_global, - 'structure': structure, - 'model': 'pre', - 'jump': 0 - }) - - for data in links_data: - if data['chunk_a'] is None or data['chunk_b'] is None: - continue - links.append( - self.package_to_link(chunk_a=data['chunk_a'], chunk_b=data['chunk_b'], is_global=data['is_global'], - structure=data['structure'], model=data['model'], jump=data['jump'], )) - return links - - async def insert_image_to_tmp_folder(self, image_bytes, image_id, image_extension): - """ - 插入图像字节流到临时文件夹中(用于插入到minIO) - 参数: - - image_bytes: 图像字节流(可以是多个图像) - - image_id: 用于保存图像文件的id - """ - output_dir = None - try: - if not isinstance(type(image_bytes), list): - image_bytes = [image_bytes] - for image in image_bytes: - output_dir = os.path.join(OssConstant.PARSER_SAVE_FOLDER, str(image_id)) - os.makedirs(output_dir, exist_ok=True) - output_path = os.path.join(output_dir, str(image_id)+'.'+image_extension) - with open(output_path, 'wb') as f: - f.write(image) - return True - except Exception as e: - logging.error(f'Insert images {image_id} error: {e}') - return False + @staticmethod + async def parser(parser_method: str, file_path: str) -> ParseResult: + """ + 解析器 + :param parser_method: 解析器方法 + :param file_path: 文件路径 + :return: 解析结果 + """ + parser_class = BaseParser.find_worker_class(parser_method) + if parser_class: + return await parser_class.parser(file_path) + else: + err = f"[BaseParser] 解析器不存在,parser_method: {parser_method}" + logging.exception(err) + raise err diff --git a/data_chain/parser/handler/doc_parser.py b/data_chain/parser/handler/doc_parser.py index 9e89b9b..03c9195 100644 --- a/data_chain/parser/handler/doc_parser.py +++ b/data_chain/parser/handler/doc_parser.py @@ -1,38 +1,47 @@ -from data_chain.logger.logger import logger as logging +import asyncio +from bs4 import BeautifulSoup +import markdown +import os from tika import parser - -from data_chain.parser.handler.base_parser import BaseService - +import requests +import uuid +from data_chain.entities.enum import DocParseRelutTopology, ChunkParseTopology, ChunkType +from data_chain.parser.parse_result import ParseNode, ParseResult +from data_chain.parser.handler.base_parser import BaseParser +from data_chain.logger.logger import logger as logging -class DocService(BaseService): - def extract_paragraph(self, paragraph): - sentences = self.split_sentences(paragraph, self.chunk_tokens) - results = [] - for sentence in sentences: - results.append({ - "type": "para", - "text": sentence, - }) - return results +class DocParser(BaseParser): + name = 'doc' @staticmethod - def open_file(file_path): - return open(file_path, 'rb') - async def parser(self, file_path): - binary = self.open_file(file_path) + binary = open(file_path, 'rb') try: js = parser.from_buffer(binary) + if js.get('status') != 200: + err = "tika服务异常" + logging.exception("[DocParser] %s", err) + raise Exception(err) except Exception as e: - logging.error(f"Error opening file {file_path} :{e}") + err = "tika服务异常" + logging.exception("[DocParser] %s", err) raise e - content=js.get('content','') - paragraphs = content.split('\n') - sentences = [] - for paragraph in paragraphs: - sentences.extend(self.extract_paragraph(paragraph)) - chunks = self.build_chunks_by_lines(sentences) - chunk_links = self.build_chunk_links_by_line(chunks) - return chunks, chunk_links, [] - + try: + content = js.get('content', '') + except Exception as e: + err = "tika服务返回的内容异常" + logging.exception("[DocParser] %s", err) + raise e + parse_node = ParseNode( + id=uuid.uuid4(), + lv=0, + parse_topology_type=ChunkParseTopology.GERNERAL, + content=content, + type=ChunkType.TEXT, + link_nodes=[] + ) + return ParseResult( + parse_topology_type=DocParseRelutTopology.LIST, + nodes=[parse_node] + ) diff --git a/data_chain/parser/handler/docx_parser.py b/data_chain/parser/handler/docx_parser.py index accbb45..728aa6e 100644 --- a/data_chain/parser/handler/docx_parser.py +++ b/data_chain/parser/handler/docx_parser.py @@ -1,7 +1,4 @@ import docx -from io import BytesIO -from PIL import Image -import numpy as np from docx.document import Document from docx.text.paragraph import Paragraph from docx.parts.image import ImagePart @@ -9,25 +6,29 @@ from docx.table import _Cell, Table from docx.oxml.table import CT_Tbl from docx.oxml.text.paragraph import CT_P from docx.oxml.shape import CT_Picture +from io import BytesIO +from PIL import Image +import numpy as np import mimetypes -from data_chain.parser.handler.base_parser import BaseService -from data_chain.parser.tools.ocr import BaseOCR +from data_chain.logger.logger import logger as logging +import asyncio +from bs4 import BeautifulSoup +import markdown +import os +import requests +import uuid +from data_chain.entities.enum import DocParseRelutTopology, ChunkParseTopology, ChunkType +from data_chain.parser.parse_result import ParseNode, ParseResult +from data_chain.apps.base.zip_handler import ZipHandler +from data_chain.parser.handler.base_parser import BaseParser from data_chain.logger.logger import logger as logging -class DocxService(BaseService): - def __init__(self): - super().__init__() - - def open_file(self, file_path): - try: - doc = docx.Document(file_path) - return doc - except Exception as e: - logging.error(f"Opening docx file {file_path} failed due to:{e}") - raise e +class DocxParser(BaseParser): + name = 'docx' - def is_image(self, graph: Paragraph, doc: Document): + @staticmethod + async def is_image(graph: Paragraph, doc: Document) -> bool: images = graph._element.xpath('.//pic:pic') for image in images: for img_id in image.xpath('.//a:blip/@r:embed'): @@ -36,8 +37,9 @@ class DocxService(BaseService): return True return False + @staticmethod # 获取run中的所有图片 - def get_imageparts_from_run(self, run, doc: Document): + async def get_imageparts_from_run(run, doc: Document) -> list[ImagePart]: image_parts = [] drawings = run._r.xpath('.//w:drawing') # 获取所有图片 for drawing in drawings: @@ -47,119 +49,155 @@ class DocxService(BaseService): image_parts.append(part) return image_parts + @staticmethod + async def extract_table_to_array(table: Table) -> list[list[str]]: + table_data = [] + for row in table.rows: + row_data = [] + for cell in row.cells: + cell_text = ''.join([p.text for p in cell.paragraphs]) + row_data.append(cell_text) + table_data.append(row_data) + return table_data + + @staticmethod # 遍历文档中的块级元素 - def get_lines(self, parent): + async def docx_to_parse_nodes(parent) -> list[ParseNode]: if isinstance(parent, Document): parent_elm = parent.element.body elif isinstance(parent, _Cell): parent_elm = parent._tc else: - logging.error("Unsupported parent type: %s", type(parent)) - return [] - lines = [] + err = "不支持的父元素类型" + logging.exception("[DocxParser] %s", err) + raise err + + nodes = [] for child in parent_elm.iterchildren(): if isinstance(child, CT_P): paragraph = Paragraph(child, parent) - if self.is_image(paragraph, parent): + if (await DocxParser.is_image(paragraph, parent)): text_part = '' run_index = 0 runs = paragraph.runs while run_index < len(runs): run = runs[run_index] - image_parts = self.get_imageparts_from_run(run, parent) + image_parts = await DocxParser.get_imageparts_from_run(run, parent) if image_parts: if text_part: - lines.append( - { - 'text': text_part, - 'type': 'para' - } + nodes.append + ( + ParseNode( + id=uuid.uuid4(), + lv=0, + parse_topology_type=ChunkParseTopology.GERNERAL, + content=text_part, + type=ChunkType.TEXT, + link_nodes=[] + ) ) text_part = '' for image_part in image_parts: try: image_blob = image_part.image.blob - content_type = image_part.content_type except Exception as e: - logging.error(f"Get Image blob and part failed due to :{e}") + err = "获取图片blob和content type失败" + logging.exception("[DocxParser] %s", err) continue - extension = mimetypes.guess_extension(content_type).replace('.', '') - lines.append( - { - 'image': Image.open(BytesIO(image_blob)), - 'extension': extension, - 'type': 'image' - } + nodes.append( + ParseNode( + id=uuid.uuid4(), + lv=0, + parse_topology_type=ChunkParseTopology.GERNERAL, + content=image_blob, + type=ChunkType.IMAGE, + link_nodes=[] + ) ) else: text_part += run.text run_index += 1 if text_part: - lines.append( - { - 'text': text_part, - 'type': 'para' - } + nodes.append + ( + ParseNode( + id=uuid.uuid4(), + lv=0, + parse_topology_type=ChunkParseTopology.GERNERAL, + content=text_part, + type=ChunkType.TEXT, + link_nodes=[] + ) ) else: - lines.append( - { - 'text': paragraph.text, - 'type': 'para' - } + nodes.append + ( + ParseNode( + id=uuid.uuid4(), + lv=0, + parse_topology_type=ChunkParseTopology.GERNERAL, + content=paragraph.text, + type=ChunkType.TEXT, + link_nodes=[] + ) ) elif isinstance(child, CT_Tbl): table = Table(child, parent) - rows = self.split_table(table) - for row in rows: - lines.append( - { - 'text': row, - 'type': 'table' - } - ) + table_array = await DocxParser.extract_table_to_array(table) + for row in table_array: + for cell in row: + if cell: + nodes.append + ( + ParseNode( + id=uuid.uuid4(), + lv=0, + parse_topology_type=ChunkParseTopology.GERNERAL, + content=cell, + type=ChunkType.TEXT, + link_nodes=[] + ) + ) elif isinstance(child, CT_Picture): img_id = child.xpath('.//a:blip/@r:embed')[0] part = parent.part.related_parts[img_id] if isinstance(part, ImagePart): try: image_blob = part.image.blob - content_type = part.content_type except Exception as e: - logging.error(f'Get image blob and content type failed due to: {e}') + err = "获取图片blob和content type失败" + logging.exception("[DocxParser] %s", err) continue - extension = mimetypes.guess_extension(content_type).replace('.', '') - lines.append( - { - 'image': Image.open(BytesIO(image_blob)), - 'extension': extension, - 'type': 'image' - } + nodes.append + ( + ParseNode( + id=uuid.uuid4(), + lv=0, + parse_topology_type=ChunkParseTopology.GERNERAL, + content=image_blob, + type=ChunkType.IMAGE, + link_nodes=[] + ) ) - return lines - - async def parser(self, file_path): - """ - 解析文件并提取其中的文本和图像信息。 + return nodes - 参数: - - file_path (str): 文件的路径。 - - 返回: - - tuple: 包含分块的文本信息、分块间的链接信息和提取的图像信息的元组。 - 如果文件无法打开或解析失败,则返回 None。 - """ - doc = self.open_file(file_path) + @staticmethod + async def parser(file_path: str) -> ParseResult: + doc = docx.Document(file_path) if not doc: - return None - if self.parser_method != "general": - self.ocr_tool = BaseOCR(llm=self.llm, method=self.parser_method) - lines = self.get_lines(doc) + err = "无法打开docx文件" + logging.exception("[DocxParser] %s", err) + raise err + nodes = await DocxParser.docx_to_parse_nodes(doc) + DocxParser.image_related_node_in_link_nodes(nodes) + parse_result = ParseResult( + parse_topology_type=DocParseRelutTopology.LIST, + nodes=nodes + ) + return parse_result + - lines, images = await self.change_lines(lines) - lines = await self.ocr_from_images_in_lines(lines) - chunks = self.build_chunks_by_lines(lines) - chunk_links = self.build_chunk_links_by_line(chunks) - return chunks, chunk_links, images +nodes = asyncio.run(DocxParser.parser("通算融合算子收益分析.docx")) +print(nodes) diff --git a/data_chain/parser/handler/html_parser.py b/data_chain/parser/handler/html_parser.py index f2ec567..a71b19d 100644 --- a/data_chain/parser/handler/html_parser.py +++ b/data_chain/parser/handler/html_parser.py @@ -1,56 +1,192 @@ -from data_chain.logger.logger import logger as logging +import asyncio from bs4 import BeautifulSoup -from data_chain.parser.handler.base_parser import BaseService +import markdown +import os +import requests +import uuid +from data_chain.entities.enum import DocParseRelutTopology, ChunkParseTopology, ChunkType +from data_chain.parser.parse_result import ParseNode, ParseResult +from data_chain.parser.handler.base_parser import BaseParser +from data_chain.logger.logger import logger as logging + + +class HTMLParser(BaseParser): + name = 'html' + + @staticmethod + async def extract_table_to_array(table_html: str) -> list[list[str]]: + soup = BeautifulSoup(table_html, 'html.parser') + rows = soup.find_all('tr') + table_data = [] + for row in rows: + cells = row.find_all(['th', 'td']) + row_data = [cell.get_text(strip=True) for cell in cells] + if row_data: + table_data.append(row_data) + return table_data + @staticmethod + async def get_image_blob(img_src: str) -> bytes: + if img_src.startswith(('http://', 'https://')): + try: + response = requests.get(img_src) + response.raise_for_status() + return response.content + except requests.RequestException as e: + warining = f"[MdZipParser] 图片下载失败 {e}" + logging.warning(warining) + return None + else: + return None + + @staticmethod + async def build_subtree(html: str, current_level: int = 0) -> list[ParseNode]: + soup = BeautifulSoup(html, 'html.parser') + soup_elements = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'code', 'p', 'img', 'table', 'a', 'div']) + subtree = [] + while soup_elements: + element = soup_elements.pop(0) + if element.name == 'div': + # 去掉 div 标签,直接处理内部元素 + inner_html = ''.join(str(child) for child in element.children) + child_subtree = await HTMLParser.build_subtree(inner_html, current_level+1) + subtree.extend(child_subtree) + parse_topology_type = ChunkParseTopology.TREENORMAL if len( + child_subtree) else ChunkParseTopology.TREELEAF + node = ParseNode( + id=uuid.uuid4(), + title='', + lv=current_level, + parse_topology_type=parse_topology_type, + content="", + type=ChunkType.TEXT, + link_nodes=child_subtree + ) + subtree.append(node) + elif element.name.startswith('h'): + level = int(element.name[1:]) + title = element.get_text() + if level > current_level: + sub_elements = [] + while soup_elements: + next_element = soup_elements[0] + next_level = int(next_element.name[1:]) if next_element.name.startswith('h') else float('inf') + if next_level <= current_level: + break + sub_elements.append(soup_elements.pop(0)) + sub_html = ''.join(str(sub_el) for sub_el in sub_elements) + child_subtree = await HTMLParser.build_subtree(sub_html, level) + parse_topology_type = ChunkParseTopology.TREENORMAL if len( + child_subtree) else ChunkParseTopology.TREELEAF + node = ParseNode( + id=uuid.uuid4(), + title=title, + lv=level, + parse_topology_type=parse_topology_type, + content="", + type=ChunkType.TEXT, + link_nodes=child_subtree + ) + subtree.append(node) + elif level == current_level: + continue + else: + soup_elements.insert(0, element) + break + elif element.name == 'code': + code_text = element.get_text().strip() + node = ParseNode( + id=uuid.uuid4(), + lv=current_level, + parse_topology_type=ChunkParseTopology.TREELEAF, + content=code_text, + type=ChunkType.CODE, + link_nodes=[] + ) + subtree.append(node) + elif element.name == 'p': + para_text = element.get_text().strip() + if para_text: + node = ParseNode( + id=uuid.uuid4(), + lv=current_level, + parse_topology_type=ChunkParseTopology.TREELEAF, + content=para_text, + type=ChunkType.TEXT, + link_nodes=[] + ) + subtree.append(node) + elif element.name == 'img': + img_src = element.get('src') + img_blob = await HTMLParser.get_image_blob(img_src) + if img_blob: + node = ParseNode( + id=uuid.uuid4(), + lv=current_level, + parse_topology_type=ChunkParseTopology.TREELEAF, + content=img_blob, + type=ChunkType.IMAGE, + link_nodes=[] + ) + subtree.append(node) + elif element.name == 'table': + table_array = await HTMLParser.extract_table_to_array(str(element)) + for row in table_array: + node = ParseNode( + id=uuid.uuid4(), + lv=current_level, + parse_topology_type=ChunkParseTopology.TREELEAF, + content=row, + type=ChunkType.TABLE, + link_nodes=[] + ) + elif element.name == 'a': + link_text = element.get_text().strip() + link_href = element.get('href') + if link_text and link_href: + node = ParseNode( + id=uuid.uuid4(), + lv=current_level, + parse_topology_type=ChunkParseTopology.TREELEAF, + content=link_href, + type=ChunkType.LINK, + link_nodes=[] + ) + subtree.append(node) + return subtree + @staticmethod + async def flatten_tree(root: ParseNode, nodes: list[ParseNode]) -> None: + nodes.append(root) + for child in root.link_nodes: + await HTMLParser.flatten_tree(child, nodes) -class HtmlService(BaseService): - # 读取 HTML 文件 + @staticmethod + async def html_to_tree(html: str) -> ParseNode: + root = ParseNode( + id=uuid.uuid4(), + title="", + lv=0, + parse_topology_type=ChunkParseTopology.TREEROOT, + content="", + type=ChunkType.TEXT, + link_nodes=[] + ) + root.link_nodes = await HTMLParser.build_subtree(html, 0) + nodes = [] + await HTMLParser.flatten_tree(root, nodes) + return nodes @staticmethod - def open_file(file_path): - try: - with open(file_path, 'r', encoding='utf-8', errors='ignore') as file: - html_content = file.read() - return html_content - except Exception as e: - logging.error(f"Error opening file {file_path} :{e}") - raise e - - def element_to_dict(self, element): - node_dict = { - "tag": element.name, # 当前节点的标签名 - "attributes": element.attrs if element.attrs else '', # 标签的属性(如果有) - "text": element.get_text(strip=True) if element.string else '', # 标签内的文字 - "children": [], # 子节点列表 - "id": self.get_uuid(), - "type": "para", - "type_attr": 'leaf', - } - - # 处理图片 - if element.name == "img": - node_dict["img"] = element.get('src', None) - node_dict['type'] = 'img' - # 处理列表 - elif element.name in ["ul", "ol"]: - node_dict["list"] = [li.get_text(strip=True) for li in element.find_all('li')] - node_dict['type'] = 'table' - - # 递归处理子元素 - for child in element.children: - if child.name: # 如果子节点是标签而不是字符串 - node_dict['type_attr'] = 'node' - child_node = self.element_to_dict(child) - node_dict["children"].append(child_node) - - return node_dict - - async def parser(self, file_path): - html_content = self.open_file(file_path) - # 解析 HTML 内容 - soup = BeautifulSoup(html_content, 'lxml') - tree = self.element_to_dict(soup) - chunks, chunk_links = self.build_chunks_and_links_by_tree(tree) - return chunks, chunk_links, [] + async def parse(file_path) -> ParseResult: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as file: + html = file.read() + print(html) + nodes = await HTMLParser.html_to_tree(html) + return ParseResult( + parse_topology_type=DocParseRelutTopology.TREE, + nodes=nodes + ) + +nodes = asyncio.run(HTMLParser.parse("example.html")) diff --git a/data_chain/parser/handler/json_parser.py b/data_chain/parser/handler/json_parser.py new file mode 100644 index 0000000..f221e3f --- /dev/null +++ b/data_chain/parser/handler/json_parser.py @@ -0,0 +1,34 @@ +import json +import uuid + +from data_chain.parser.parse_result import ParseNode, ParseResult +from data_chain.parser.handler.base_parser import BaseParser +from data_chain.logger.logger import logger as logging + + +class JsonParser(BaseParser): + name = 'json' + + @staticmethod + async def parser(file_path: str) -> ParseResult: + try: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as file: + content = file.read() + data = json.loads(content) + except Exception as e: + err = "读取json文件失败" + logging.exception("[JsonParser] %s", err) + raise e + node = ParseNode( + id=uuid.uuid4(), + lv=0, + parse_topology_type=None, + content=data, + type=None, + link_nodes=[] + ) + parse_result = ParseResult( + parse_topology_type=None, + nodes=[node] + ) + return parse_result diff --git a/data_chain/parser/handler/md_parser.py b/data_chain/parser/handler/md_parser.py index 0018df0..f506787 100644 --- a/data_chain/parser/handler/md_parser.py +++ b/data_chain/parser/handler/md_parser.py @@ -1,50 +1,177 @@ +import asyncio +from bs4 import BeautifulSoup +import markdown +import os +import requests +import uuid +from data_chain.entities.enum import DocParseRelutTopology, ChunkParseTopology, ChunkType +from data_chain.parser.parse_result import ParseNode, ParseResult +from data_chain.apps.base.zip_handler import ZipHandler +from data_chain.parser.handler.base_parser import BaseParser from data_chain.logger.logger import logger as logging -from data_chain.parser.handler.base_parser import BaseService -class MdService(BaseService): +class MdParser(BaseParser): + name = 'md' + @staticmethod + async def extract_table_to_array(table_html: str) -> list[list[str]]: + + soup = BeautifulSoup(table_html, 'html.parser') + + # 获取表格的所有行 + rows = soup.find_all('tr') + + table_data = [] + + for row in rows: + # 获取行中的所有单元格,包括表头()和普通单元格() + cells = row.find_all(['th', 'td']) + + # 提取单元格中的文本,并去除多余的空白字符 + row_data = [cell.get_text(strip=True) for cell in cells] + + if row_data: # 如果该行有数据 + table_data.append(row_data) + + return table_data @staticmethod - def read_md(file_path): - # 打开并读取Markdown文件 - try: - with open(file_path, 'r', encoding='utf-8',errors='ignore') as file: - data = file.read() - return data - except Exception as e: - logging.error(f"Error opening file {file_path} :{e}") - raise e - - # 提取列表分词结果 - def extract_from_md(self, data) -> dict: - md = data - lines = md.split('\n') - results = [] - if len(lines) > 1: - type = "table" + async def get_image_blob(img_src: str) -> bytes: + if img_src.startswith(('http://', 'https://')): + try: + response = requests.get(img_src) + response.raise_for_status() + return response.content + except requests.RequestException as e: + warining = f"[MdParser] 图片下载失败 {e}" + logging.warning(warining) + return None else: - type = "para" - lines = lines[0] - lines = self.split_sentences(lines, self.chunk_tokens) - for line in lines: - results.append({ - 'type': type, - 'text': line, - }) - return results - - async def parser(self, file_path): - data = self.read_md(file_path) - parts = data.split('\n\n') #分割 - sentences = [] - for part in parts: - sentences.extend(self.extract_from_md(part)) - chunks = self.build_chunks_by_lines(sentences) - chunk_links = self.build_chunk_links_by_line(chunks) - return chunks, chunk_links, [] - - -if __name__ == '__main__': - model = MdService() - chunks, links, images = model.parser('test.md') + return None + + @staticmethod + async def build_subtree(html: str, current_level: int) -> list[ParseNode]: + soup = BeautifulSoup(html, 'html.parser') + soup_elements = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'img', 'table', 'pre']) + subtree = [] + while soup_elements: + element = soup_elements.pop(0) + if element.name.startswith('h'): + level = int(element.name[1:]) + title = element.get_text() + if level > current_level: + sub_elements = [] + while soup_elements: + next_element = soup_elements[0] + next_level = int(next_element.name[1:]) if next_element.name.startswith('h') else float('inf') + if next_level <= current_level: + break + sub_elements.append(soup_elements.pop(0)) + sub_html = ''.join(str(sub_el) for sub_el in sub_elements) + child_subtree = await MdParser.build_subtree(sub_html, level) + parse_topology_type = ChunkParseTopology.TREENORMAL if len( + child_subtree) else ChunkParseTopology.TREELEAF + node = ParseNode( + id=uuid.uuid4(), + title=title, + lv=level, + parse_topology_type=parse_topology_type, + content="", + type=ChunkType.TEXT, + link_nodes=child_subtree + ) + subtree.append(node) + elif level == current_level: + continue + else: + soup_elements.insert(0, element) + break + elif (element.name == 'p' or element.name == 'pre') and element.find('code'): + code_text = element.find('code').get_text() + node = ParseNode( + id=uuid.uuid4(), + + lv=current_level, + parse_topology_type=ChunkParseTopology.TREELEAF, + content=code_text, + type=ChunkType.CODE, + link_nodes=[] + ) + subtree.append(node) + elif element.name == 'p': + para_text = element.get_text().strip() + if para_text: + node = ParseNode( + id=uuid.uuid4(), + + lv=current_level, + parse_topology_type=ChunkParseTopology.TREELEAF, + content=para_text, + type=ChunkType.TEXT, + link_nodes=[] + ) + subtree.append(node) + elif element.name == 'img': + img_src = element.get('src') + img_blob = await MdParser.get_image_blob(img_src) + if img_blob: + node = ParseNode( + id=uuid.uuid4(), + + lv=current_level, + parse_topology_type=ChunkParseTopology.TREELEAF, + content=img_blob, + type=ChunkType.IMAGE, + link_nodes=[] + ) + subtree.append(node) + elif element.name == 'table': + table_array = await MdParser.extract_table_to_array(str(element)) + for row in table_array: + node = ParseNode( + id=uuid.uuid4(), + + lv=current_level, + parse_topology_type=ChunkParseTopology.TREELEAF, + content=row, + type=ChunkType.TABLE, + link_nodes=[] + ) + subtree.append(node) + + return subtree + + @staticmethod + async def flatten_tree(root: ParseNode, nodes: list[ParseNode]) -> None: + nodes.append(root) + for child in root.link_nodes: + await MdParser.flatten_tree(child, nodes) + + @staticmethod + async def markdown_to_tree(markdown_text: str) -> ParseNode: + html = markdown.markdown(markdown_text, extensions=['tables']) + root = ParseNode( + id=uuid.uuid4(), + title="", + lv=0, + parse_topology_type=ChunkParseTopology.TREEROOT, + content="", + type=ChunkType.TEXT, + link_nodes=[] + ) + root.link_nodes = await MdParser.build_subtree(html, 0) + nodes = [] + await MdParser.flatten_tree(root, nodes) + return nodes + + @staticmethod + async def parse(file_path: str) -> ParseResult: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + markdown_text = f.read() + nodes = await MdParser.markdown_to_tree(markdown_text) + parse_result = ParseResult( + parse_topology_type=DocParseRelutTopology.TREE, + nodes=nodes + ) + return parse_result diff --git a/data_chain/parser/handler/md_zip_parser.py b/data_chain/parser/handler/md_zip_parser.py new file mode 100644 index 0000000..3d03181 --- /dev/null +++ b/data_chain/parser/handler/md_zip_parser.py @@ -0,0 +1,205 @@ +import asyncio +from bs4 import BeautifulSoup +import markdown +import os +import requests +import uuid +from data_chain.entities.enum import DocParseRelutTopology, ChunkParseTopology, ChunkType +from data_chain.parser.parse_result import ParseNode, ParseResult +from data_chain.apps.base.zip_handler import ZipHandler +from data_chain.parser.handler.base_parser import BaseParser +from data_chain.logger.logger import logger as logging + + +class MdZipParser(BaseParser): + name = 'zip' + + @staticmethod + async def extract_table_to_array(table_html: str) -> list[list[str]]: + + soup = BeautifulSoup(table_html, 'html.parser') + + # 获取表格的所有行 + rows = soup.find_all('tr') + + table_data = [] + + for row in rows: + # 获取行中的所有单元格,包括表头()和普通单元格() + cells = row.find_all(['th', 'td']) + + # 提取单元格中的文本,并去除多余的空白字符 + row_data = [cell.get_text(strip=True) for cell in cells] + + if row_data: # 如果该行有数据 + table_data.append(row_data) + + return table_data + + @staticmethod + async def get_image_blob(base_dir: str, img_src: str) -> bytes: + if img_src.startswith(('http://', 'https://')): + try: + response = requests.get(img_src) + response.raise_for_status() + return response.content + except requests.RequestException as e: + warining = f"[MdZipParser] 图片下载失败 {e}" + logging.warning(warining) + return None + else: + img_path = os.path.join(base_dir, img_src) + if os.path.exists(img_path): + try: + with open(img_path, 'rb') as file: + return file.read() + except Exception as e: + warining = f"[MdZipParser] 图片读取失败 {e}" + logging.warning(warining) + return None + else: + warining = f"[MdZipParser] 图片路径不存在 {img_path}" + logging.warning(warining) + return None + + @staticmethod + async def build_subtree(file_path: str, html: str, current_level: int) -> list[ParseNode]: + soup = BeautifulSoup(html, 'html.parser') + soup_elements = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'img', 'table', 'pre']) + subtree = [] + while soup_elements: + element = soup_elements.pop(0) + if element.name.startswith('h'): + level = int(element.name[1:]) + title = element.get_text() + if level > current_level: + sub_elements = [] + while soup_elements: + next_element = soup_elements[0] + next_level = int(next_element.name[1:]) if next_element.name.startswith('h') else float('inf') + if next_level <= current_level: + break + sub_elements.append(soup_elements.pop(0)) + sub_html = ''.join(str(sub_el) for sub_el in sub_elements) + child_subtree = await MdZipParser.build_subtree(file_path, sub_html, level) + parse_topology_type = ChunkParseTopology.TREENORMAL if len( + child_subtree) else ChunkParseTopology.TREELEAF + node = ParseNode( + id=uuid.uuid4(), + title=title, + lv=level, + parse_topology_type=parse_topology_type, + content="", + type=ChunkType.TEXT, + link_nodes=child_subtree + ) + subtree.append(node) + elif level == current_level: + node = ParseNode( + id=uuid.uuid4(), + title=title, + lv=level, + parse_topology_type=ChunkParseTopology.TREENORMAL, + content="", + type=ChunkType.HEADER, + link_nodes=[] + ) + subtree.append(node) + else: + soup_elements.insert(0, element) + break + elif (element.name == 'p' or element.name == 'pre') and element.find('code'): + code_text = element.find('code').get_text() + node = ParseNode( + id=uuid.uuid4(), + + lv=current_level, + parse_topology_type=ChunkParseTopology.TREELEAF, + content=code_text, + type=ChunkType.CODE, + link_nodes=[] + ) + subtree.append(node) + elif element.name == 'p': + para_text = element.get_text().strip() + if para_text: + node = ParseNode( + id=uuid.uuid4(), + + lv=current_level, + parse_topology_type=ChunkParseTopology.TREELEAF, + content=para_text, + type=ChunkType.TEXT, + link_nodes=[] + ) + subtree.append(node) + elif element.name == 'img': + img_src = element.get('src') + img_blob = await MdZipParser.get_image_blob(os.path.dirname(file_path), img_src) + if img_blob: + node = ParseNode( + id=uuid.uuid4(), + + lv=current_level, + parse_topology_type=ChunkParseTopology.TREELEAF, + content=img_blob, + type=ChunkType.IMAGE, + link_nodes=[] + ) + subtree.append(node) + elif element.name == 'table': + table_array = await MdZipParser.extract_table_to_array(str(element)) + for row in table_array: + node = ParseNode( + id=uuid.uuid4(), + + lv=current_level, + parse_topology_type=ChunkParseTopology.TREELEAF, + content=row, + type=ChunkType.TABLE, + link_nodes=[] + ) + subtree.append(node) + + return subtree + + @staticmethod + async def flatten_tree(root: ParseNode, nodes: list[ParseNode]) -> None: + nodes.append(root) + for child in root.link_nodes: + await MdZipParser.flatten_tree(child, nodes) + + @staticmethod + async def markdown_to_tree(file_path: str, markdown_text: str) -> ParseNode: + html = markdown.markdown(markdown_text, extensions=['tables']) + root = ParseNode( + id=uuid.uuid4(), + title="", + lv=0, + parse_topology_type=ChunkParseTopology.TREEROOT, + content="", + type=ChunkType.TEXT, + link_nodes=[] + ) + root.link_nodes = await MdZipParser.build_subtree(file_path, html, 0) + nodes = [] + await MdZipParser.flatten_tree(root, nodes) + return nodes + + @staticmethod + async def parse(file_path: str) -> ParseResult: + target_file_path = os.path.join(os.path.dirname(file_path), 'temp') + await ZipHandler.unzip_file(file_path, target_file_path) + markdown_file = [f for f in os.listdir(target_file_path) if f.endswith('.md')] + if not markdown_file: + err = f"[MdZipParser] markdown文件不存在" + logging.error(err) + raise FileNotFoundError(err) + markdown_file_path = os.path.join(target_file_path, markdown_file[0]) if markdown_file else None + with open(markdown_file_path, 'r', encoding='utf-8', errors='ignore') as f: + markdown_text = f.read() + nodes = await MdZipParser.markdown_to_tree(target_file_path, markdown_text) + return ParseResult( + parse_topology_type=DocParseRelutTopology.TREE, + nodes=nodes + ) diff --git a/data_chain/parser/handler/pdf_parser.py b/data_chain/parser/handler/pdf_parser.py index f155265..0161079 100644 --- a/data_chain/parser/handler/pdf_parser.py +++ b/data_chain/parser/handler/pdf_parser.py @@ -1,274 +1,222 @@ +import asyncio import io import fitz +from fitz import Page +from fitz import Document import numpy as np from PIL import Image +from pandas import DataFrame +from pydantic import BaseModel, Field, validator, constr +import uuid +from data_chain.entities.enum import DocParseRelutTopology, ChunkParseTopology, ChunkType +from data_chain.parser.parse_result import ParseNode, ParseResult +from data_chain.parser.handler.base_parser import BaseParser from data_chain.logger.logger import logger as logging -from data_chain.parser.handler.base_parser import BaseService -from data_chain.parser.tools.ocr import BaseOCR -class PdfService(BaseService): +class Bbox(BaseModel): + x0: float = Field(..., description="左上角x坐标") + x1: float = Field(..., description="右下角x坐标") + y0: float = Field(..., description="左上角y坐标") + y1: float = Field(..., description="右下角y坐标") - def __init__(self) -> None: - super().__init__() - self.image_model = None - self.total_pages = None - self.pdf_document = None - def open_pdf(self, file_path: str) -> None: - """打开PDF文件并初始化文档对象 +class ParseNodeWithBbox(BaseModel): + node: ParseNode = Field(..., description="文本块的内容") + bbox: Bbox = Field(..., description="文本块的边界框") - :param file_path: PDF文件的路径 - :type file_path: str - """ - try: - self.pdf_document = fitz.open(file_path) - self.total_pages = len(self.pdf_document) - except Exception as e: - logging.error(f"Error opening file {file_path} :{e}") - raise e - - def extract_text(self, page_number: int) -> list[dict]: - """从PDF页面中提取文本块及其位置信息 - :param page_number: PDF页面的页码 - :type page_number: int - :return: 包含文本块及其位置信息的列表 - :rtype: list[dict] - """ - if self.pdf_document is None: - return [] - page = self.pdf_document.load_page(page_number) - text_lines = [] +class PdfParser(BaseParser): + name = 'pdf' + @staticmethod + async def extract_text_from_page(page: Page) -> list[ParseNodeWithBbox]: + nodes_with_bbox = [] text_blocks = page.get_text("blocks") for block in text_blocks: if block[6] == 0: # 确保是文本块 text = block[4].strip() bounding_box = block[:4] # (x0, y0, x1, y1) if text: - text_lines.append({"bbox": bounding_box, - "text": text, - "type": "para", - }) - return sorted(text_lines, key=lambda x: (x["bbox"][1], x["bbox"][0])) + nodes_with_bbox.append(ParseNodeWithBbox( + node=ParseNode( + id=uuid.uuid4(), + title=text, + lv=0, + parse_topology_type=ChunkParseTopology.GRAPHNODE, + content=text, + type=ChunkType.TEXT, + link_nodes=[], + ), + bbox=Bbox( + x0=bounding_box[0], + y0=bounding_box[1], + x1=bounding_box[2], + y1=bounding_box[3] + ) + )) + return sorted(nodes_with_bbox, key=lambda x: (x.bbox.y0, x.bbox.x0)) - def extract_table(self, page_number: int) -> list[dict]: - """从PDF页面中提取表格 + @staticmethod + async def extract_table_to_array(table: DataFrame) -> list[list[str]]: + table_array = [] + for index, row in table.iterrows(): + row_data = [] + for column in table.columns: + cell_value = str(row[column]) + if cell_value: + row_data.append(cell_value) + table_array.append(row_data) + return table_array - :param page_number: PDF页面的页码 - :type page_number: int - :return: 包含表格内容(pandas格式)和边界框(x0, y0, x1, y1)的列表 - :rtype: list[dict] - """ - if self.pdf_document is None: - return [] - page = self.pdf_document.load_page(page_number) + @staticmethod + async def extract_table_from_page(page: Page) -> list[ParseNodeWithBbox]: + nodes_with_bbox = [] tables = page.find_tables() - table_data = [] for table in tables: table_bbox = fitz.Rect(table.bbox) page.add_redact_annot(table.bbox) - table_df = table.to_pandas() - table_lines = self.split_table(table_df) - for line in table_lines: - table_data.append({ - "text": line, - "bbox": table_bbox, - "type": "table", - }) - + table_df = table.to_pandas() + table_array = await PdfParser.extract_table_to_array(table_df) + for row in table_array: + node_with_bbox = ParseNodeWithBbox( + node=ParseNode( + id=uuid.uuid4(), + + lv=0, + parse_topology_type=ChunkParseTopology.GRAPHNODE, + content=row, + type=ChunkType.TABLE, + link_nodes=[], + ), + bbox=Bbox( + x0=table_bbox.x0, + y0=table_bbox.y0, + x1=table_bbox.x1, + y1=table_bbox.y1 + ) + ) + nodes_with_bbox.append(node_with_bbox) page.apply_redactions() - return table_data + return nodes_with_bbox - async def extract_image(self, page_number: int, text: list[dict]) -> tuple[list[dict], list[dict]]: - """提取图片并返回图片的识别结果和图片的id - - :param page_number: PDF页面的页码 - :type page_number: int - :param text: 从PDF中提取的文本块 - :type text: list[dict] - :return: 包含图片识别结果和图片块的列表 - :rtype: tuple[list[dict], list[dict]] - """ - if self.pdf_document is None: - return [], [] - page = self.pdf_document.load_page(page_number) - if page is None: - return [], [] + @staticmethod + async def extract_image_from_page(pdf_doc: Document, page: Page) -> list[ParseNodeWithBbox]: + nodes_with_bbox = [] image_list = page.get_images(full=True) - image_results = [] - image_chunks = [] for image_info in image_list: - # 获取图片的xref - xref = image_info[0] - # 提取基础图片(如果存在) - base_image = self.pdf_document.extract_image(xref) - position = page.get_image_rects(xref)[0] - # 获取图片的二进制数据 - image_bytes = base_image["image"] - # 获取图片的扩展名 - image_ext = base_image["ext"] - # 获取图片的边界框 - bounding_box = (position.x0, position.y0, position.x1, position.y1) - nearby_text = self.find_near_words(bounding_box, text) - - image = Image.open(io.BytesIO(image_bytes)) - image_id = self.get_uuid() - - await self.insert_image_to_tmp_folder(image_bytes, image_id, image_ext) try: - img_np = np.array(image) + # 获取图片的xref + xref = image_info[0] + # 提取基础图片(如果存在) + base_image = pdf_doc.extract_image(xref) + position = page.get_image_rects(xref)[0] + # 获取图片的二进制数据 + blob = base_image["image"] + nodes_with_bbox.append(ParseNodeWithBbox( + node=ParseNode( + id=uuid.uuid4(), + + lv=0, + parse_topology_type=ChunkParseTopology.GRAPHNODE, + content=blob, + type=ChunkType.IMAGE, + link_nodes=[], + ), + bbox=Bbox( + x0=position.x0, + y0=position.y0, + x1=position.x1, + y1=position.y1 + ) + )) except Exception as e: - logging.error(f"Error converting image to numpy array: {e}") + err = "提取图片失败" + logging.exception("[PdfParser] %s", err) continue - ocr_results = await self.image_model.image_to_text(img_np, image_related_text=nearby_text) - - # 获取OCR结果 - chunk_id = self.get_uuid() - image_results.append({ - "type": "image", - "text": ocr_results, - "bbox": bounding_box, - "xref": xref, - "id": chunk_id, - }) - image_chunks.append({ - "id": image_id, - "chunk_id": chunk_id, - "extension": image_ext, - }) - - return image_results, image_chunks - - def extract_text_with_position(self, page_number: int) -> list[dict]: - """提取带有位置的文本块 - :param page_number: PDF页面的页码 - :type page_number: int - :return: 包含文本块及其位置信息的列表 - :rtype: list[dict] - """ - page = self.pdf_document.load_page(page_number) - text_blocks = [] - temp_blocks = [] + return nodes_with_bbox - for block in page.get_text("dict")["blocks"]: - if "lines" in block: # 确保是文本块 - for line in block["lines"]: - for span in line["spans"]: - temp_blocks.extend([{ - "text": span["text"], - "bbox": span["bbox"], # 文本边界框 (x0, y0, x1, y1) - "type": "para", - }]) - - text_blocks.extend(temp_blocks) - return text_blocks - - def find_near_words(self, bounding_box: tuple[float, float, float, float], texts: list[dict]) -> str: - """查找附近的文本 - - :param bounding_box: 图片的边界框 (x0, y0, x1, y1) - :type bounding_box: tuple[float, float, float, float] - :param texts: 文本块列表 - :type texts: list[dict] - :return: 附近的文本内容 - :rtype: str - """ - image_x0, image_y0, image_x1, image_y1 = bounding_box + @staticmethod + async def image_related_text( + image_node_with_bbox: ParseNodeWithBbox, text_nodes_with_bbox: list[ParseNodeWithBbox]): + image_x0, image_y0, image_x1, image_y1 = image_node_with_bbox.bbox.x0, image_node_with_bbox.bbox.y0, \ + image_node_with_bbox.bbox.x1, image_node_with_bbox.bbox.y1 threshold = 100 image_x0 -= threshold image_y0 -= threshold image_x1 += threshold image_y1 += threshold - line = "" - for text in texts: - text_x0, text_y0, text_x1, text_y1 = text["bbox"] - text_content = text["text"] + for text_node_with_bbox in text_nodes_with_bbox: + text_x0, text_y0, text_x1, text_y1 = text_node_with_bbox.bbox.x0, text_node_with_bbox.bbox.y0, \ + text_node_with_bbox.bbox.x1, text_node_with_bbox.bbox.y1 # 检查文本是否水平相邻 horizontally_adjacent = (text_x1 >= image_x0 - threshold and text_x0 <= image_x1 + threshold) # 检查文本是否垂直相邻 vertically_adjacent = (text_y1 >= image_y0 - threshold and text_y0 <= image_y1 + threshold) # 检查文本是否相交或相邻 if horizontally_adjacent and vertically_adjacent: - line = line + text_content - - return line + image_node_with_bbox.node.link_nodes.append(text_node_with_bbox.node) @staticmethod - def merge_list(text_list: list[dict], image_or_table_list: list[dict]) -> list[dict]: - """根据边界框合并文本列表和图片/表格列表 + async def merge_nodes_with_bbox( + nodes_1: list[ParseNodeWithBbox], + nodes_2: list[ParseNodeWithBbox]) -> list[ParseNodeWithBbox]: + if not nodes_1: + return nodes_2 + if not nodes_2: + return nodes_1 - :param text_list: 文本块列表 - :type text_list: list[dict] - :param image_or_table_list: 图片或表格列表 - :type image_or_table_list: list[dict] - :return: 合并后的列表 - :rtype: list[dict] - """ - if text_list is None: - return image_or_table_list - if image_or_table_list is None: - return text_list - image_or_table_list_length = len(image_or_table_list) - current_index = 0 max_x = 0 - merged_list = [] - - for text_block in text_list: - max_x = max(max_x, text_block["bbox"][2]) - if current_index < image_or_table_list_length: - image_or_table_block = image_or_table_list[current_index] - while current_index < image_or_table_list_length and image_or_table_block["bbox"][0] < max_x and image_or_table_block["bbox"][1] < text_block["bbox"][1]: - merged_list.append(image_or_table_block) - current_index += 1 - if current_index < image_or_table_list_length: - image_or_table_block = image_or_table_list[current_index] - merged_list.append(text_block) - while current_index < image_or_table_list_length: - image_or_table_block = image_or_table_list[current_index] - merged_list.append(image_or_table_block) - current_index += 1 - - return merged_list + index = 0 + nodes_3 = [] + + for node in nodes_1: + max_x = max(max_x, node.bbox.x1) + if index < len(nodes_2): + node_2 = nodes_2[index] + while index < len(nodes_2) and node_2.bbox.x0 < max_x and node_2.bbox.y0 < node.bbox.y0: + nodes_3.append(node_2) + index += 1 + if index < len(nodes_2): + node_2 = nodes_2[index] + nodes_3.append(node) + while index < len(nodes_2): + node_2 = nodes_2[index] + nodes_3.append(node_2) + index += 1 + return nodes_3 - async def parser(self, file_path: str) -> tuple[list[dict], list[dict], list[dict]]: - """解析PDF文件并返回文本块、链接和图片块 - - :param file_path: PDF文件的路径 - :type file_path: str - :return: 包含文本块、链接和图片块的元组 - :rtype: tuple[list[dict], list[dict], list[dict]] - """ - self.open_pdf(file_path) - method = self.parser_method - sentences = [] - all_image_chunks = [] - if method != "general": - self.image_model = BaseOCR(llm=self.llm, - method=self.parser_method) - for page_num in range(self.total_pages): - tables = self.extract_table(page_num) - text = self.extract_text(page_num) - merged_list = self.merge_list(text, tables) - if method != "general": - images, image_chunks = await self.extract_image(page_num, text) - merged_list = self.merge_list(merged_list, images) - all_image_chunks.extend(image_chunks) - sentences.extend(merged_list) - - chunks = self.build_chunks_by_lines(sentences) - chunk_links = self.build_chunk_links_by_line(chunks) - return chunks, chunk_links, all_image_chunks - - def __del__(self) -> None: - """析构函数,关闭PDF文档并释放资源""" - if self.pdf_document: - self.pdf_document.close() - self.total_pages = None - self.pdf_document = None - self.image_model = None + @staticmethod + async def parser(file_path: str) -> ParseResult: + try: + pdf_doc = fitz.open(file_path) + except Exception as e: + err = "无法打开pdf文件" + logging.exception("[PdfParser] %s", err) + raise e + nodes_with_bbox = [] + for page_num in range(len(pdf_doc)): + page = pdf_doc.load_page(page_num) + text_nodes_with_bbox = await PdfParser.extract_text_from_page(page) + table_nodes_with_bbox = await PdfParser.extract_table_from_page(page) + image_nodes_with_bbox = await PdfParser.extract_image_from_page(pdf_doc, page) + sub_nodes_with_bbox = await PdfParser.merge_nodes_with_bbox( + text_nodes_with_bbox, table_nodes_with_bbox) + sub_nodes_with_bbox = await PdfParser.merge_nodes_with_bbox( + sub_nodes_with_bbox, image_nodes_with_bbox) + nodes_with_bbox.extend(sub_nodes_with_bbox) + nodes = [node_with_bbox.node for node_with_bbox in nodes_with_bbox] + DocxParser.image_related_node_in_link_nodes(nodes) + parse_result = ParseResult( + parse_topology_type=DocParseRelutTopology.GRAPH, + nodes=nodes + ) + return parse_result + + +tmp = asyncio.run(PdfParser.parser("KOBA样册2022.pdf")) +print(tmp) diff --git a/data_chain/parser/handler/pptx_parser.py b/data_chain/parser/handler/pptx_parser.py index 96fdca8..5dcacff 100644 --- a/data_chain/parser/handler/pptx_parser.py +++ b/data_chain/parser/handler/pptx_parser.py @@ -1,20 +1,30 @@ - -from pptx import Presentation import os -from io import BytesIO -from PIL import Image -import numpy as np -from data_chain.parser.handler.base_parser import BaseService -from data_chain.parser.tools.ocr import BaseOCR +from pptx import Presentation +from pptx.table import Table +import uuid +from data_chain.entities.enum import DocParseRelutTopology, ChunkParseTopology, ChunkType +from data_chain.parser.parse_result import ParseNode, ParseResult +from data_chain.parser.handler.base_parser import BaseParser from data_chain.logger.logger import logger as logging -class PptxService(BaseService): - def __init__(self): - super().__init__() +class PptxParser(BaseParser): + name = 'pptx' + + @staticmethod + async def extract_table_to_array(table: Table) -> list[list[str]]: + table_data = [] + for row in table.rows: + row_data = [] + for cell in row.cells: + cell_text = ''.join([p.text for p in cell.paragraphs]) + row_data.append(cell_text) + table_data.append(row_data) + return table_data - async def extract_ppt_content(self, pptx): - lines = [] + @staticmethod + async def ppt_to_parse_nodes(pptx) -> list[ParseNode]: + nodes = [] for slide_num, slide in enumerate(pptx.slides, start=1): for shape in slide.shapes: @@ -26,58 +36,68 @@ class PptxService(BaseService): for run in paragraph.runs: text += run.text except Exception as e: - logging.error(f"Get text from slide failed due to: {e}") + err = "文字提取失败" + logging.exception("[PptxParser] %s", err) if text.strip(): - lines.append({ - "text": text, - "type": 'para' - }) + nodes.append( + ParseNode( + id=uuid.uuid4(), + + lv=0, + parse_topology_type=ChunkParseTopology.GERNERAL, + content=text, + type=ChunkType.TEXT, + link_nodes=[] + ) + ) # 提取表格 elif shape.has_table: table = shape.table - rows = self.split_table(table) - for row in rows: - lines.append({ - "text": text, - "type": "table" - }) + table_array = await PptxParser.extract_table_to_array(table) + for row in table_array: + node = ParseNode( + id=uuid.uuid4(), + + lv=0, + parse_topology_type=ChunkParseTopology.GERNERAL, + content=row, + type=ChunkType.TABLE, + link_nodes=[] + ) + nodes.append(node) # 提取图片 elif shape.shape_type == 13: # 13 表示图片类型 try: image = shape.image - image_ext = os.path.splitext(image.filename)[1] + blob = image.blob except Exception as e: - logging.error(f"Extracting image from slide failed due to: {e}") + err = "图片提取失败" + logging.exception("[PptxParser] %s", err) continue - lines.append({ - "image": Image.open(BytesIO(image.blob)), - "type": "image", - "extension": image_ext - }) - - return lines + nodes.append( + ParseNode( + id=uuid.uuid4(), - async def parser(self, file_path): - """ - 解析文件并提取其中的文本和图像信息。 + lv=0, + parse_topology_type=ChunkParseTopology.GERNERAL, + content=blob, + type=ChunkType.IMAGE, + link_nodes=[] + ) + ) - 参数: - - file_path (str): 文件的路径。 + return nodes - 返回: - - tuple: 包含分块的文本信息、分块间的链接信息和提取的图像信息的元组。 - 如果文件无法打开或解析失败,则返回 None。 - """ + @staticmethod + async def parser(file_path): try: pptx = Presentation(file_path) except Exception as e: - logging.error(f"Pptx open failed due to: {e}") - raise e - if self.parser_method != "general": - self.ocr_tool = BaseOCR(llm=self.llm, method=self.parser_method) - lines = await self.extract_ppt_content(pptx) - lines, images = await self.change_lines(lines) - lines = await self.ocr_from_images_in_lines(lines) - chunks = self.build_chunks_by_lines(lines) - chunk_links = self.build_chunk_links_by_line(chunks) - return chunks, chunk_links, images + err = "PPTX文件解析失败" + logging.exception("[PptxParser] %s", err) + DocxParser.image_related_node_in_link_nodes(nodes) + parse_result = ParseResult( + parse_topology_type=DocParseRelutTopology.LIST, + nodes=nodes + ) + return parse_result diff --git a/data_chain/parser/handler/txt_parser.py b/data_chain/parser/handler/txt_parser.py index 5ba6206..5d7e84d 100644 --- a/data_chain/parser/handler/txt_parser.py +++ b/data_chain/parser/handler/txt_parser.py @@ -1,51 +1,44 @@ import uuid import chardet +from data_chain.entities.enum import DocParseRelutTopology, ChunkParseTopology, ChunkType +from data_chain.parser.parse_result import ParseNode, ParseResult +from data_chain.parser.handler.base_parser import BaseParser from data_chain.logger.logger import logger as logging -from data_chain.parser.handler.base_parser import BaseService -Empty_id = uuid.UUID(int=0) - -class TxtService(BaseService): - - # 提取段落分词结果 - def extract_paragraph(self, paragraph): - sentences = self.split_sentences(paragraph, self.chunk_tokens) - results = [] - for sentence in sentences: - results.append({ - "type": "para", - "text": sentence, - }) - return results +class TxtParser(BaseParser): + name = 'txt' @staticmethod # 获取编码方式 - def detect_encoding(file_path): + async def detect_encoding(file_path: str) -> str: with open(file_path, 'rb') as file: raw_data = file.read() result = chardet.detect(raw_data) encoding = result['encoding'] return encoding - # 获取段落 - def read_text_file_by_paragraph(self, file_path): + @staticmethod + async def parser(file_path: str) -> ParseResult: + enconding = await TxtParser.detect_encoding(file_path) try: - encoding = self.detect_encoding(file_path) - with open(file_path, 'r', encoding=encoding,errors='ignore') as file: # 打开文件 + with open(file_path, 'r', encoding=enconding, errors='ignore') as file: content = file.read() - paragraphs = content.split('\n') - return paragraphs except Exception as e: - logging.error(f"Error opening file {file_path} :{e}") - - async def parser(self, file_path): - # 使用函数 - paragraphs = self.read_text_file_by_paragraph(file_path) - sentences = [] - for paragraph in paragraphs: - sentences.extend(self.extract_paragraph(paragraph)) - chunks = self.build_chunks_by_lines(sentences) - chunk_links = self.build_chunk_links_by_line(chunks) - # 打印每个段落 - return chunks, chunk_links, [] + err = "读取txt文件失败" + logging.exception("[TxtParser] %s", err) + raise e + node = ParseNode( + id=uuid.uuid4(), + title="", + lv=0, + parse_topology_type=ChunkParseTopology.GERNERAL, + content=content, + type=ChunkType.TEXT, + link_nodes=[] + ) + parse_result = ParseResult( + parse_topology_type=DocParseRelutTopology.LIST, + nodes=[node] + ) + return parse_result diff --git a/data_chain/parser/handler/xlsx_parser.py b/data_chain/parser/handler/xlsx_parser.py index 0cf72a2..eb79ec1 100644 --- a/data_chain/parser/handler/xlsx_parser.py +++ b/data_chain/parser/handler/xlsx_parser.py @@ -1,11 +1,18 @@ import pandas as pd -from data_chain.logger.logger import logger as logging -from data_chain.parser.handler.base_parser import BaseService +from pandas import DataFrame +from pydantic import BaseModel, Field, validator, constr +import uuid +from data_chain.entities.enum import DocParseRelutTopology, ChunkParseTopology, ChunkType +from data_chain.parser.parse_result import ParseNode, ParseResult +from data_chain.parser.handler.base_parser import BaseParser +from data_chain.logger.logger import logger as logging -class XlsxService(BaseService): +class XlsxParser(BaseParser): + name = 'xlsx' # 打开Excel文件 + @staticmethod def read_xlsx(file_path): try: @@ -15,20 +22,37 @@ class XlsxService(BaseService): logging.error(f"Error opening file {file_path} :{e}") raise e - # 提取列表分词结果 - def extract_table(self, data): - lines = self.split_table(data) - results = [] - for line in lines: - results.append({ - 'type': 'table', - 'text': line, - }) - return results + @staticmethod + async def extract_table_to_array(table: DataFrame) -> list[list[str]]: + table_array = [] + for index, row in table.iterrows(): + row_data = [str(cell) for cell in row] + table_array.append(row_data) + return table_array - async def parser(self, file_path): - data = self.read_xlsx(file_path) - sentences = self.extract_table(data) - chunks = self.build_chunks_by_lines(sentences) - chunk_links = self.build_chunk_links_by_line(chunks) - return chunks, chunk_links, [] + @staticmethod + async def parser(file_path: str) -> ParseResult: + try: + data = pd.read_excel(file_path, sheet_name=None, header=None) + except Exception as e: + err = "读取xlsx文件失败" + logging.exception("[XlsxParser] %s", err) + raise e + nodes = [] + for sheet_name, df in data.items(): + table_array = await XlsxParser.extract_table_to_array(df) + for row in table_array: + node = ParseNode( + id=uuid.uuid4(), + lv=0, + parse_topology_type=ChunkParseTopology.GERNERAL, + content=row, + type=ChunkType.TABLE, + link_nodes=[] + ) + nodes.append(node) + parse_result = ParseResult( + parse_topology_type=DocParseRelutTopology.LIST, + nodes=nodes + ) + return parse_result diff --git a/data_chain/parser/handler/yaml_parser.py b/data_chain/parser/handler/yaml_parser.py new file mode 100644 index 0000000..e422af4 --- /dev/null +++ b/data_chain/parser/handler/yaml_parser.py @@ -0,0 +1,33 @@ +import yaml +import uuid + +from data_chain.parser.parse_result import ParseNode, ParseResult +from data_chain.parser.handler.base_parser import BaseParser +from data_chain.logger.logger import logger as logging + + +class YamlParser(BaseParser): + name = 'yaml' + + @staticmethod + async def parser(file_path: str) -> ParseResult: + try: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as file: + content = yaml.load(file, Loader=yaml.SafeLoader) + except Exception as e: + err = "读取yaml文件失败" + logging.exception("[YamlParser] %s", err) + raise e + node = ParseNode( + id=uuid.uuid4(), + lv=0, + parse_topology_type=None, + content=content, + type=None, + link_nodes=[] + ) + parse_result = ParseResult( + parse_topology_type=None, + nodes=[node] + ) + return parse_result diff --git a/data_chain/parser/parse_result.py b/data_chain/parser/parse_result.py new file mode 100644 index 0000000..b701c0f --- /dev/null +++ b/data_chain/parser/parse_result.py @@ -0,0 +1,26 @@ +from typing import Any, Optional +import uuid +from pydantic import BaseModel, Field, validator, constr +from data_chain.entities.enum import DocParseRelutTopology, ChunkParseTopology, ChunkType +from data_chain.entities.common import DEFAULt_DOC_TYPE_ID + + +class ParseNode(BaseModel): + """节点""" + id: uuid.UUID = Field(..., description="节点ID") + pre_id: Optional[uuid.UUID] = Field(None, description="父节点ID") + title: Optional[str] = Field(None, description="节点标题") + lv: int = Field(..., description="节点层级") + parse_topology_type: ChunkParseTopology = Field(..., description="解析拓扑类型") + text_feature: str = Field(default='', description="节点特征") + vector: Optional[list[float]] = Field(default=None, description="节点向量") + text: str = Field(default='', description="节点文本") + content: Any = Field(..., description="节点内容") + type: ChunkType = Field(..., description="节点类型") + link_nodes: list = Field(..., description="链接节点") + + +class ParseResult(BaseModel): + """解析结果""" + parse_topology_type: DocParseRelutTopology = Field(..., description="解析拓扑类型") + nodes: list[ParseNode] = Field(..., description="节点列表") diff --git a/data_chain/parser/service/parser_service.py b/data_chain/parser/service/parser_service.py deleted file mode 100644 index 47a4d20..0000000 --- a/data_chain/parser/service/parser_service.py +++ /dev/null @@ -1,247 +0,0 @@ -import shutil -import os -from typing import List, Dict -import traceback -from data_chain.apps.service.embedding_service import Vectorize -from data_chain.manager.knowledge_manager import KnowledgeBaseManager -from data_chain.manager.model_manager import ModelManager -from data_chain.models.constant import OssConstant, embedding_model_out_dimensions, ParseMethodEnum -from data_chain.parser.handler.docx_parser import DocxService -from data_chain.parser.handler.html_parser import HtmlService -from data_chain.parser.handler.xlsx_parser import XlsxService -from data_chain.parser.handler.txt_parser import TxtService -from data_chain.parser.handler.pdf_parser import PdfService -from data_chain.parser.handler.md_parser import MdService -from data_chain.parser.handler.doc_parser import DocService -from data_chain.parser.handler.pptx_parser import PptxService -from data_chain.stores.postgres.postgres import ChunkEntity, TemporaryChunkEntity, ChunkLinkEntity, PostgresDB, ImageEntity, TemporaryVectorItemstEntity -from data_chain.manager.document_manager import DocumentManager, TemporaryDocumentManager -from data_chain.manager.chunk_manager import ChunkManager, ChunkLinkManager, TemporaryChunkManager -from data_chain.manager.image_manager import ImageManager -from data_chain.stores.minio.minio import MinIO -from data_chain.manager.vector_items_manager import VectorItemsManager, TemporaryVectorItemsManager -from data_chain.logger.logger import logger as logging - - -class ParserService: - # TODO:把user_id和doc_id提取到这层 - def __init__(self): - self.doc = None - - async def parser(self, doc_id, file_path, is_temporary_document=False): - model_map = { - ".docx": DocxService, - ".doc": DocService, - ".txt": TxtService, - ".pdf": PdfService, - ".xlsx": XlsxService, - ".md": MdService, - ".html": HtmlService, - ".pptx": PptxService, - } - if not is_temporary_document: - self.doc = await DocumentManager.select_by_id(doc_id) - else: - self.doc = await TemporaryDocumentManager.select_by_id(doc_id) - file_extension = self.doc.extension - try: - if file_extension in model_map: - model = model_map[file_extension]() # 判断文件类型 - if not is_temporary_document: - llm_entity = await ModelManager.select_by_user_id(self.doc.user_id) - await model.init_service(llm_entity=llm_entity, - chunk_tokens=self.doc.chunk_size, - parser_method=self.doc.parser_method) - else: - await model.init_service(llm_entity=None, - chunk_tokens=self.doc.chunk_size, - parser_method=self.doc.parser_method) - chunk_list, chunk_link_list, image_chunks = await model.parser(file_path) - if not is_temporary_document: - for chunk in chunk_list: - chunk['doc_id'] = doc_id - chunk['user_id'] = self.doc.user_id - chunk['kb_id'] = self.doc.kb_id - for image_chunk in image_chunks: - image_chunk['doc_id'] = doc_id - image_chunk['user_id'] = self.doc.user_id - else: - for chunk in chunk_list: - chunk['doc_id'] = doc_id - for image_chunk in image_chunks: - image_chunk['doc_id'] = doc_id - else: - logging.error(f"No service available for file type: {file_extension}") - return {"chunk_list": [], "chunk_link_list": [], "image_chunks": []} - except Exception as e: - logging.error(f'fail with exception:{e}') - logging.error(f'fail with exception:{traceback.format_exc()}') - raise e - return {"chunk_list": chunk_list, "chunk_link_list": chunk_link_list, "image_chunks": image_chunks} - - @staticmethod - async def upload_full_text_to_database(document_id, full_text, is_temporary_document=False): - try: - update_dict = {'full_text': full_text} - if not is_temporary_document: - await DocumentManager.update(document_id, update_dict) - else: - await TemporaryDocumentManager.update(document_id, update_dict) - except Exception as e: - logging.error(f'Update full text to pg failed due to:{e}') - raise e - - @staticmethod - async def upload_chunks_to_database(chunks, is_temporary_document=False): - if len(chunks) == 0: - return - try: - if not is_temporary_document: - image_entity_list = await ImageManager.query_image_by_doc_id(chunks[0]['doc_id']) - for image_entity in image_entity_list: - MinIO.delete_object(OssConstant.MINIO_BUCKET_PICTURE, str(image_entity.id)) - await ChunkManager.delete_by_document_ids([chunks[0]['doc_id']]) - else: - await TemporaryChunkManager.delete_by_temporary_document_ids([chunks[0]['doc_id']]) - except Exception as e: - logging.error(f"Failed to delete chunk: {e}") - raise e - try: - if not is_temporary_document: - chunk_entity_list = [] - for chunk in chunks: - chunk_entity = ChunkEntity( - id=chunk['id'], - kb_id=chunk['kb_id'], - user_id=chunk['user_id'], - document_id=chunk['doc_id'], - text=chunk['text'], - tokens=chunk['tokens'], - type=chunk['type'], - global_offset=chunk['global_offset'], - local_offset=chunk['local_offset'], - enabled=chunk['enabled'], - status=chunk['status'] - ) - chunk_entity_list.append(chunk_entity) - await ChunkManager.insert_chunks(chunk_entity_list) - else: - chunk_entity_list = [] - for chunk in chunks: - chunk_entity = TemporaryChunkEntity( - id=chunk['id'], - document_id=chunk['doc_id'], - text=chunk['text'], - tokens=chunk['tokens'], - type=chunk['type'], - global_offset=chunk['global_offset'], - ) - chunk_entity_list.append(chunk_entity) - await TemporaryChunkManager.insert_temprorary_chunks(chunk_entity_list) - except Exception as e: - logging.error(f"Failed to upload chunk: {e}") - raise e - - @staticmethod - async def upload_chunk_links_to_database(chunk_links: List[Dict], is_temporary_document: bool = False): - try: - chunk_link_entity_list = [] - if not is_temporary_document: - for chunk_link in chunk_links: - chunk_link_entity = ChunkLinkEntity( - id=chunk_link['id'], - chunk_a_id=chunk_link['chunk_a'], - chunk_b_id=chunk_link['chunk_b'], - type=chunk_link['type'], - ) - chunk_link_entity_list.append(chunk_link_entity) - await ChunkLinkManager.insert_chunk_links(chunk_link_entity_list) - except Exception as e: - logging.error(f"Failed to upload chunk: {e}") - raise e - - @staticmethod - async def upload_images_to_minio(images, is_temporary_document: bool = False): - output_dir = None - try: - if not is_temporary_document: - for image in images: - output_dir = os.path.join(OssConstant.PARSER_SAVE_FOLDER, str(image['id'])) - output_path = os.path.join(output_dir, str(image['id'])+'.'+image['extension']) - await MinIO.put_object(OssConstant.MINIO_BUCKET_PICTURE, str(image['id']), output_path) - except Exception as e: - logging.error(f"Failed to upload image: {e}") - finally: - for image in images: - output_dir = os.path.join(OssConstant.PARSER_SAVE_FOLDER, str(image['id'])) - if output_dir and os.path.exists(output_dir): - shutil.rmtree(output_dir) - - @staticmethod - async def upload_images_to_database(images, is_temporary_document: bool = False): - try: - image_entity_list = [] - if not is_temporary_document: - for image in images: - image_entity = ImageEntity(id=image['id'], - chunk_id=image['chunk_id'], - document_id=image['doc_id'], - user_id=image['user_id'], - ) - image_entity_list.append(image_entity) - await ImageManager.add_images(image_entity_list) - except Exception as e: - logging.error(f"Failed to upload image: {e}") - raise e - - @staticmethod - async def embedding_chunks(chunks, is_temporary_document: bool = False): - try: - vectors = [] - if not is_temporary_document: - for chunk in chunks: - vectors.append({'chunk_id': chunk['id'], - 'doc_id': chunk['doc_id'], - 'kb_id': chunk['kb_id'], - 'user_id': chunk['user_id'], - 'vector': await Vectorize.vectorize_embedding(chunk['text']), - 'enabled': chunk['enabled'], - }) - else: - for chunk in chunks: - vectors.append({'chunk_id': chunk['id'], - 'doc_id': chunk['doc_id'], - 'vector': await Vectorize.vectorize_embedding(chunk['text']), - }) - return vectors - except Exception as e: - logging.error(f"Failed to embedding chunk: {e}") - raise e - - @staticmethod - async def upload_vectors_to_database(vectors, is_temporary_document=False): - if len(vectors) == 0: - return - try: - if not is_temporary_document: - doc = await DocumentManager.select_by_id(vectors[0]['doc_id']) - kb = await KnowledgeBaseManager.select_by_id(doc.kb_id) - vector_items_table = await PostgresDB.get_dynamic_vector_items_table( - str(kb.vector_items_id), - embedding_model_out_dimensions[kb.embedding_model] - ) - await PostgresDB.create_table(vector_items_table) - await VectorItemsManager.add_all(vector_items_table, vectors) - else: - vector_entity_list = [] - for vector in vectors: - vector_entity_list.append( - TemporaryVectorItemstEntity( - document_id=vector['doc_id'], - chunk_id=vector['chunk_id'], - vector=vector['vector']) - ) - await TemporaryVectorItemsManager.add_all(vector_entity_list) - except Exception as e: - logging.error(f"Failed to upload chunk: {e}") - raise e diff --git a/data_chain/parser/tools/image_tool.py b/data_chain/parser/tools/image_tool.py new file mode 100644 index 0000000..4c56682 --- /dev/null +++ b/data_chain/parser/tools/image_tool.py @@ -0,0 +1,16 @@ +from data_chain.logger.logger import logger as logging + + +class ImageTool: + @staticmethod + def get_image_type(b): + hex_str = bytes.hex(b).upper() + if "FFD8FF" in hex_str: + return "jpg" + elif "89504E47" in hex_str: + return "png" + elif "47494638" in hex_str: + return "gif" + elif "424D" in hex_str: + return "bmp" + return "jpeg" diff --git a/data_chain/parser/tools/ocr.py b/data_chain/parser/tools/ocr.py deleted file mode 100644 index e49c465..0000000 --- a/data_chain/parser/tools/ocr.py +++ /dev/null @@ -1,153 +0,0 @@ -import yaml -from paddleocr import PaddleOCR - -from data_chain.logger.logger import logger as logging -from data_chain.config.config import config -from data_chain.parser.tools.split import split_tools - - -class BaseOCR: - - def __init__(self, llm=None, method='general'): - # 指定模型文件的路径 - det_model_dir = 'data_chain/parser/model/ocr/ch_PP-OCRv4_det_infer' - rec_model_dir = 'data_chain/parser/model/ocr/ch_PP-OCRv4_rec_infer' - cls_model_dir = 'data_chain/parser/model/ocr/ch_ppocr_mobile_v2.0_cls_infer' - - # 创建 PaddleOCR 实例,指定模型路径 - self.model = PaddleOCR( - det_model_dir=det_model_dir, - rec_model_dir=rec_model_dir, - cls_model_dir=cls_model_dir, - use_angle_cls=True, # 是否使用角度分类模型 - use_space_char=True # 是否使用空格字符 - ) - self.llm = llm - if llm is None and method == 'enhanced': - method = 'ocr' - else: - self.max_tokens = 1024 - self.method = method - - async def ocr_from_image(self, image): - """ - 图片ocr接口 - 参数: - image图片 - """ - try: - ocr_result = self.model.ocr(image) - if ocr_result is None or ocr_result[0] is None: - return None - return ocr_result - except Exception as e: - logging.error(f"Ocr from image failed due to: {e}") - return None - - async def merge_text_from_ocr_result(self, ocr_result): - """ - ocr结果文字内容合并接口 - 参数: - ocr_result:ocr识别结果,包含了文字坐标、内容、置信度 - """ - text = '' - try: - for _ in ocr_result[0]: - text += str(_[1][0]) - return text - except Exception as e: - logging.error(f'Get text from ocr result failed due to: {e}') - return '' - - async def cut_ocr_result_in_part(self, ocr_result, max_tokens=1024): - """ - ocr结果切割接口 - 参数: - ocr_result:ocr识别结果,包含了文字坐标、内容、置信度 - max_tokens:最大token数 - """ - tokens = 0 - ocr_result_part = [] - ocr_result_parts = [] - for _ in ocr_result[0]: - if _ is not None and len(_) > 0: - sub_tokens = split_tools.get_tokens(str(_)) - if tokens + sub_tokens > max_tokens: - ocr_result_parts.append(ocr_result_part) - ocr_result_part = [_] - tokens += sub_tokens - else: - ocr_result_part.append(_) - tokens += sub_tokens - if len(ocr_result_part) > 0: - ocr_result_parts.append(ocr_result_part) - return ocr_result_parts - - async def enhance_ocr_result(self, ocr_result, image_related_text): - """ - ocr结果强化接口 - 参数: - ocr_result:ocr识别结果,包含了文字坐标、内容、置信度 - image_related_text:图片组对应的前后文 - """ - try: - try: - with open(config['PROMPT_PATH'], 'r', encoding='utf-8') as f: - prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) - prompt_template = prompt_dict.get('OCR_ENHANCED_PROMPT', '') - except Exception as e: - logging.error(f'Get prompt template failed due to :{e}') - return '' - pre_part_description = "" - ocr_result_parts = await self.cut_ocr_result_in_part(ocr_result, self.max_tokens // 5*2) - user_call = '请详细输出图片的摘要,不要输出其他内容' - for part in ocr_result_parts: - pre_part_description_cp = pre_part_description - try: - prompt = prompt_template.format( - image_related_text=image_related_text, - pre_part_description=pre_part_description, - part=part) - pre_part_description = await self.llm.nostream([], prompt, user_call) - except Exception as e: - logging.error(f"OCR result part enhance failed due to: {e}") - pre_part_description = pre_part_description_cp - return pre_part_description - except Exception as e: - logging.error(f'OCR result enhance failed due to: {e}') - return "" - - async def get_text_from_image(self, ocr_result, image_related_text): - """ - 从image中提取文字的接口 - 输入: - ocr_result: ocr结果 - image_related_text: 图片相关文字 - """ - if self.method == 'ocr': - text = await self.merge_text_from_ocr_result(ocr_result) - return text - elif self.method == 'enhanced': - try: - text = await self.enhance_ocr_result(ocr_result, image_related_text) - if len(text) == 0: - text = await self.merge_text_from_ocr_result(ocr_result) - except Exception as e: - logging.error(f"LLM ERROR with: {e}") - text = await self.merge_text_from_ocr_result(ocr_result) - return text - else: - return "" - - async def image_to_text(self, image, image_related_text=''): - """ - 执行ocr的接口 - 输入: - image:图像文件 - image_related_text:图像相关的文本 - """ - ocr_result = await self.ocr_from_image(image) - if ocr_result is None: - return "" - text = await self.get_text_from_image(ocr_result, image_related_text) - return text diff --git a/data_chain/parser/tools/ocr_tool.py b/data_chain/parser/tools/ocr_tool.py new file mode 100644 index 0000000..8193f53 --- /dev/null +++ b/data_chain/parser/tools/ocr_tool.py @@ -0,0 +1,89 @@ +import yaml +from paddleocr import PaddleOCR +import numpy as np +from data_chain.parser.tools.token_tool import TokenTool +from data_chain.logger.logger import logger as logging +from data_chain.config.config import config +from data_chain.llm.llm import LLM + + +class OcrTool: + det_model_dir = 'data_chain/parser/model/ocr/ch_PP-OCRv4_det_infer' + rec_model_dir = 'data_chain/parser/model/ocr/ch_PP-OCRv4_rec_infer' + cls_model_dir = 'data_chain/parser/model/ocr/ch_ppocr_mobile_v2.0_cls_infer' + model = PaddleOCR( + det_model_dir=det_model_dir, + rec_model_dir=rec_model_dir, + cls_model_dir=cls_model_dir, + use_angle_cls=True, # 是否使用角度分类模型 + use_space_char=True # 是否使用空格字符 + ) + + @staticmethod + async def ocr_from_image(image: np.ndarray) -> list: + try: + ocr_result = await ocr_result.model.ocr(image, cls=True) + if ocr_result is None or ocr_result[0] is None: + return None + return ocr_result + except Exception as e: + err = f"[OCRTool] OCR识别失败 {e}" + logging.exception(err) + return None + + @staticmethod + async def merge_text_from_ocr_result(ocr_result: list) -> str: + text = '' + try: + for _ in ocr_result[0]: + text += str(_[1][0]) + return text + except Exception as e: + err = f"[OCRTool] OCR结果合并失败 {e}" + logging.exception(err) + return '' + + @staticmethod + async def enhance_ocr_result(ocr_result, image_related_text='', llm: LLM = None) -> str: + try: + with open(config['PROMPT_PATH'], 'r', encoding='utf-8') as f: + prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) + prompt_template = prompt_dict.get('OCR_ENHANCED_PROMPT', '') + pre_part_description = "" + token_limit = llm.max_tokens//2 + image_related_text = TokenTool.get_k_tokens_words_from_content(image_related_text, token_limit) + ocr_result_parts = TokenTool.split_str_with_slide_window(str(ocr_result), token_limit) + user_call = '请详细输出图片的摘要,不要输出其他内容' + for part in ocr_result_parts: + pre_part_description_cp = pre_part_description + try: + prompt = prompt_template.format( + image_related_text=image_related_text, + pre_part_description=pre_part_description, + part=part) + pre_part_description = await llm.nostream([], prompt, user_call) + except Exception as e: + err = f"[OCRTool] OCR增强失败 {e}" + logging.exception(err) + pre_part_description = pre_part_description_cp + return pre_part_description + except Exception as e: + err = f"[OCRTool] OCR增强失败 {e}" + logging.exception(err) + return OCRTool.merge_text_from_ocr_result(ocr_result) + + @staticmethod + async def image_to_text(image: np.ndarray, image_related_text: str = '', llm: LLM = None) -> str: + try: + ocr_result = await OCRTool.ocr_from_image(image) + if ocr_result is None: + return '' + if llm is None: + text = await OCRTool.merge_text_from_ocr_result(ocr_result) + else: + text = await OCRTool.enhance_ocr_result(ocr_result, image_related_text, llm) + return text + except Exception as e: + err = f"[OCRTool] 图片转文本失败 {e}" + logging.exception(err) + return '' diff --git a/data_chain/parser/tools/split.py b/data_chain/parser/tools/split.py deleted file mode 100644 index 2de56ec..0000000 --- a/data_chain/parser/tools/split.py +++ /dev/null @@ -1,20 +0,0 @@ -import tiktoken -import jieba - -from data_chain.logger.logger import logger as logging - - -class SplitTools: - def get_tokens(self, content): - try: - enc = tiktoken.encoding_for_model("gpt-4") - return len(enc.encode(str(content))) - except Exception as e: - logging.error(f"Get tokens failed due to: {e}") - return 0 - - def split_words(self, text): - return list(jieba.cut(str(text))) - - -split_tools = SplitTools() diff --git a/data_chain/parser/tools/token_tool.py b/data_chain/parser/tools/token_tool.py new file mode 100644 index 0000000..ab5af72 --- /dev/null +++ b/data_chain/parser/tools/token_tool.py @@ -0,0 +1,507 @@ +import asyncio +import tiktoken +import jieba +from jieba.analyse import extract_tags +import yaml +import json +import re +import uuid +import numpy as np +from pydantic import BaseModel, Field +from data_chain.llm.llm import LLM +from data_chain.embedding.embedding import Embedding +from data_chain.config.config import config +from data_chain.logger.logger import logger as logging + + +class Grade(BaseModel): + content_len: int = Field(..., description="内容长度") + tokens: int = Field(..., description="token数") + + +class TokenTool: + stop_words_path = config['STOP_WORDS_PATH'] + with open(stop_words_path, 'r', encoding='utf-8') as f: + stopwords = set(line.strip() for line in f) + + @staticmethod + def get_leave_tokens_from_content_len(content: str) -> int: + """ + 根据内容长度获取留存的token数 + """ + grades = [ + Grade(content_len=0, tokens=0), + Grade(content_len=10, tokens=8), + Grade(content_len=50, tokens=16), + Grade(content_len=250, tokens=32), + Grade(content_len=1250, tokens=64), + Grade(content_len=6250, tokens=128), + Grade(content_len=31250, tokens=256), + Grade(content_len=156250, tokens=512), + Grade(content_len=781250, tokens=1024), + ] + tokens = TokenTool.get_tokens(content) + if tokens >= grades[-1].tokens: + return 1024 + index = 0 + for i in range(len(grades)-1): + if grades[i].content_len <= tokens < grades[i+1].content_len: + index = i + break + leave_tokens = grades[index].tokens+(grades[index+1].tokens-grades[index].tokens)*( + tokens-grades[index].content_len)/(grades[index+1].content_len-grades[index].content_len) + return int(leave_tokens) + + @staticmethod + def get_leave_setences_from_content_len(content: str) -> int: + """ + 根据内容长度获取留存的句子数量 + """ + grades = [ + Grade(content_len=0, tokens=0), + Grade(content_len=10, tokens=4), + Grade(content_len=50, tokens=8), + Grade(content_len=250, tokens=16), + Grade(content_len=1250, tokens=32), + Grade(content_len=6250, tokens=64), + Grade(content_len=31250, tokens=128), + Grade(content_len=156250, tokens=256), + Grade(content_len=781250, tokens=512), + ] + sentences = TokenTool.content_to_sentences(content) + if len(sentences) >= grades[-1].tokens: + return 1024 + index = 0 + for i in range(len(grades)-1): + if grades[i].content_len <= len(sentences) < grades[i+1].content_len: + index = i + break + leave_sentences = grades[index].tokens+(grades[index+1].tokens-grades[index].tokens)*( + len(sentences)-grades[index].content_len)/(grades[index+1].content_len-grades[index].content_len) + return int(leave_sentences) + + @staticmethod + def get_tokens(content: str) -> int: + try: + enc = tiktoken.encoding_for_model("gpt-4") + return len(enc.encode(str(content))) + except Exception as e: + err = f"[TokenTool] 获取token失败 {e}" + logging.exception("[TokenTool] %s", err) + return 0 + + @staticmethod + def get_k_tokens_words_from_content(content: str, k: int = 16) -> list: + try: + if (TokenTool.get_tokens(content) <= k): + return content + l = 0 + r = len(content) + while l+1 < r: + mid = (l+r)//2 + if (TokenTool.get_tokens(content[:mid]) <= k): + l = mid + else: + r = mid + return content[:l] + except Exception as e: + err = f"[TokenTool] 获取k个token的词失败 {e}" + logging.exception("[TokenTool] %s", err) + return "" + + @staticmethod + def split_str_with_slide_window(content: str, slide_window_size: int) -> list: + """ + 将字符串按滑动窗口切割 + """ + result = [] + try: + while len(content) > 0: + sub_content = TokenTool.get_k_tokens_words_from_content(content, slide_window_size) + result.append(sub_content) + content = content[len(sub_content):] + return result + except Exception as e: + err = f"[TokenTool] 滑动窗口切割失败 {e}" + logging.exception("[TokenTool] %s", err) + return [] + + @staticmethod + def split_words(content: str) -> list: + try: + return list(jieba.cut(str(content))) + except Exception as e: + err = f"[TokenTool] 分词失败 {e}" + logging.exception("[TokenTool] %s", err) + return [] + + @staticmethod + def get_top_k_keywords(content: str, k=10) -> list: + try: + # 使用jieba提取关键词 + keywords = extract_tags(content, topK=k, withWeight=True) + return [keyword for keyword, weight in keywords] + except Exception as e: + err = f"[TokenTool] 获取关键词失败 {e}" + logging.exception("[TokenTool] %s", err) + return [] + + @staticmethod + def compress_tokens(content: str, k: int = None) -> str: + try: + words = TokenTool.split_words(content) + # 过滤掉停用词 + filtered_words = [ + word for word in words if word not in TokenTool.stopwords + ] + filtered_content = ''.join(filtered_words) + if k is not None: + # 如果k不为None,则获取k个token的词 + filtered_content = TokenTool.get_k_tokens_words_from_content(filtered_content, k) + return filtered_content + except Exception as e: + err = f"[TokenTool] 压缩token失败 {e}" + logging.exception("[TokenTool] %s", err) + return content + + @staticmethod + def content_to_sentences(content: str) -> list: + """ + 基于特殊符号例如句号 感叹号等将段落分割为句子 + """ + # 常见缩写或不应切断的结构 + protected_phrases = [ + 'e.g.', 'i.e.', 'U.S.', 'U.K.', 'A.M.', 'P.M.', 'a.m.', 'p.m.', 'FY25Q2', + 'KPI', 'CI/CD', 'A/B test', 'PRD', 'PDF', 'API', 'OMG', 'TBD', 'EOM', + 'Inc.', 'Ltd.', 'No.', 'vs.', 'approx.', 'Dr.', 'Mr.', 'Ms.', 'Prof.', + ] + + # 替换为唯一占位符 + placeholder_map = {} + for phrase in protected_phrases: + placeholder = f"__PROTECTED_{uuid.uuid4().hex}__" + placeholder_map[placeholder] = phrase + content = content.replace(phrase, placeholder) + + # 分句正则模式 + pattern = pattern = re.compile( + r'(?<=[。!?!?;;][”’"\')】】》〕〉)\]])' # 标点+右引号/右括号后切 + r'|(?<=[。!?!?;;])(?=[^”’"\')】》〕〉)\]])' # 单个标点后,未跟右引号也可切 + r'|(?<=[\.\?!;])(?=\s|$)' # 英文标点后空格或结尾 + ) + + # 分割并还原 + sentences = [] + for segment in pattern.split(content): + segment = segment.strip() + if not segment: + continue + for placeholder, original in placeholder_map.items(): + segment = segment.replace(placeholder, original) + sentences.append(segment) + + return sentences + + @staticmethod + def get_top_k_keysentence(content: str, k: int = None) -> list: + """ + 获取前k个关键句子 + """ + if k is None: + k = TokenTool.get_leave_setences_from_content_len(content) + leave_tokens = TokenTool.get_leave_tokens_from_content_len(content) + words = TokenTool.split_words(content) + # 过滤掉停用词 + filtered_words = [ + word for word in words if word not in TokenTool.stopwords + ] + keywords = TokenTool.get_top_k_keywords(''.join(filtered_words), leave_tokens) + keywords = set(keywords) + sentences = TokenTool.content_to_sentences(content) + sentence_and_score_list = [] + index = 0 + for sentence in sentences: + score = 0 + words = TokenTool.split_words(sentence) + for word in words: + if word in keywords: + score += 1 + sentence_and_score_list.append((index, sentence, score)) + index += 1 + sentence_and_score_list.sort(key=lambda x: x[1], reverse=True) + top_k_sentence_and_score_list = sentence_and_score_list[:k] + top_k_sentence_and_score_list.sort(key=lambda x: x[0]) + return [sentence for index, sentence, score in top_k_sentence_and_score_list] + + @staticmethod + async def get_abstract_by_llm(content: str, llm: LLM) -> str: + """ + 使用llm进行内容摘要 + """ + try: + with open(config['PROMPT_PATH'], 'r', encoding='utf-8') as f: + prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) + prompt_template = prompt_dict.get('CONTENT_TO_ABSTRACT_PROMPT', '') + sentences = TokenTool.split_str_with_slide_window(content, llm.max_tokens//3*2) + abstract = '' + for sentence in sentences: + abstract = TokenTool.get_k_tokens_words_from_content(abstract, llm.max_tokens//3) + sys_call = prompt_template.format(content=sentence, abstract=abstract) + user_call = '请结合文本和摘要输出新的摘要' + abstract = await llm.nostream([], sys_call, user_call) + return abstract + except Exception as e: + err = f"[TokenTool] 获取摘要失败 {e}" + logging.exception("[TokenTool] %s", err) + + @staticmethod + async def get_title_by_llm(content: str, llm: LLM) -> str: + """ + 使用llm进行标题生成 + """ + try: + with open(config['PROMPT_PATH'], 'r', encoding='utf-8') as f: + prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) + prompt_template = prompt_dict.get('CONTENT_TO_TITLE_PROMPT', '') + content = TokenTool.get_k_tokens_words_from_content(content, llm.max_tokens) + sys_call = prompt_template.format(content=content) + user_call = '请结合文本输出标题' + title = await llm.nostream([], sys_call, user_call) + return title + except Exception as e: + err = f"[TokenTool] 获取标题失败 {e}" + logging.exception("[TokenTool] %s", err) + + @staticmethod + async def cal_recall(answer_1: str, answer_2: str, llm: LLM) -> float: + """ + 计算recall + 参数: + answer_1:答案1 + answer_2:答案2 + llm:大模型 + """ + try: + with open(config['PROMPT_PATH'], 'r', encoding='utf-8') as f: + prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) + prompt_template = prompt_dict.get('ANSWER_TO_ANSWER_PROMPT', '') + answer_1 = TokenTool.get_k_tokens_words_from_content(answer_1, llm.max_tokens//2) + answer_2 = TokenTool.get_k_tokens_words_from_content(answer_2, llm.max_tokens//2) + prompt = prompt_template.format(text_1=answer_1, text_2=answer_2) + sys_call = prompt + user_call = '请输出相似度' + similarity = await llm.nostream([], sys_call, user_call) + return eval(similarity) + except Exception as e: + err = f"[TokenTool] 计算recall失败 {e}" + logging.exception("[TokenTool] %s", err) + + @staticmethod + async def cal_precision(question: str, content: str, llm: LLM) -> float: + """ + 计算precision + 参数: + question:问题 + content:内容 + """ + try: + with open(config['PROMPT_PATH'], 'r', encoding='utf-8') as f: + prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) + prompt_template = prompt_dict.get('CONTENT_TO_STATEMENTS_PROMPT', '') + content = TokenTool.compress_tokens(content, llm.max_tokens) + sys_call = prompt_template.format(content=content) + user_call = '请结合文本输出陈诉列表' + statements = await llm.nostream([], sys_call, user_call) + statements = json.loads(statements) + if len(statements) == 0: + return 0 + score = 0 + prompt_template = prompt_dict.get('STATEMENTS_TO_QUESTION_PROMPT', '') + for statement in statements: + statement = TokenTool.get_k_tokens_words_from_content(statement, llm.max_tokens) + prompt = prompt_template.format(statement=statement, question=question) + sys_call = prompt + user_call = '请结合文本输出YES或NO' + yn = await llm.nostream([], sys_call, user_call) + yn = yn.lower() + if yn == 'yes': + score += 1 + return score/len(statements)*100 + except Exception as e: + err = f"[TokenTool] 计算precision失败 {e}" + logging.exception("[TokenTool] %s", err) + + @staticmethod + async def cal_faithfulness(question: str, answer: str, content: str, llm: LLM) -> float: + """ + 计算faithfulness + 参数: + question:问题 + answer:答案 + """ + try: + with open(config['PROMPT_PATH'], 'r', encoding='utf-8') as f: + prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) + prompt_template = prompt_dict.get('QA_TO_STATEMENTS_PROMPT', '') + question = TokenTool.get_k_tokens_words_from_content(question, llm.max_tokens//8) + answer = TokenTool.get_k_tokens_words_from_content(answer, llm.max_tokens//8*7) + prompt = prompt_template.format(question=question, answer=answer) + sys_call = prompt + user_call = '请结合问题和答案输出陈诉' + statements = await llm.nostream([], sys_call, user_call) + prompt_template = prompt_dict.get('STATEMENTS_TO_FRAGMENT_PROMPT', '') + statements = json.loads(statements) + if len(statements) == 0: + return 0 + score = 0 + content = TokenTool.compress_tokens(content, llm.max_tokens//8*7) + for statement in statements: + statement = TokenTool.get_k_tokens_words_from_content(statement, llm.max_tokens//8) + prompt = prompt_template.format(statement=statement, fragment=content) + sys_call = prompt + user_call = '请输出YES或NO' + user_call = user_call + yn = await llm.nostream([], sys_call, user_call) + yn = yn.lower() + if yn == 'yes': + score += 1 + return score/len(statements)*100 + except Exception as e: + err = f"[TokenTool] 计算faithfulness失败 {e}" + logging.exception("[TokenTool] %s", err) + + @staticmethod + def cosine_distance_numpy(vector1, vector2): + # 计算向量的点积 + dot_product = np.dot(vector1, vector2) + # 计算向量的 L2 范数 + norm_vector1 = np.linalg.norm(vector1) + norm_vector2 = np.linalg.norm(vector2) + # 计算余弦相似度 + cosine_similarity = dot_product / (norm_vector1 * norm_vector2) + # 计算余弦距离 + cosine_dist = 1 - cosine_similarity + return cosine_dist + + @staticmethod + async def cal_relevance(question: str, answer: str, llm: LLM) -> float: + """ + 计算relevance + 参数: + question:问题 + answer:答案 + """ + try: + with open(config['PROMPT_PATH'], 'r', encoding='utf-8') as f: + prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) + prompt_template = prompt_dict.get('GENREATE_QUESTION_FROM_CONTENT_PROMPT', '') + answer = TokenTool.get_k_tokens_words_from_content(answer, llm.max_tokens) + sys_call = prompt_template.format(k=5, content=answer) + user_call = '请结合文本输出问题列表' + question_vector = await Embedding.vectorize_embedding(question) + qs = await llm.nostream([], sys_call, user_call) + qs = json.loads(qs) + if len(qs) == 0: + return 0 + score = 0 + for q in qs: + q_vector = await Embedding.vectorize_embedding(q) + score += TokenTool.cosine_distance_numpy(question_vector, q_vector) + return (score/len(qs)+1)/2*100 + except Exception as e: + err = f"[TokenTool] 计算relevance失败 {e}" + logging.exception("[TokenTool] %s", err) + + @staticmethod + def cal_lcs(str1: str, str2: str) -> float: + """ + 计算两个字符串的最长公共子序列长度得分 + """ + words1 = TokenTool.split_words(str1) + words2 = TokenTool.split_words(str2) + new_words1 = [] + new_words2 = [] + for word in words1: + if word not in TokenTool.stopwords: + new_words1.append(word) + for word in words2: + if word not in TokenTool.stopwords: + new_words2.append(word) + if len(new_words1) == 0 and len(new_words2) == 0: + return 100 + if len(new_words1) == 0 or len(new_words2) == 0: + return 0 + m = len(new_words1) + n = len(new_words2) + dp = np.zeros((m+1, n+1)) + for i in range(1, m+1): + for j in range(1, n+1): + if new_words1[i-1] == new_words2[j-1]: + dp[i][j] = dp[i-1][j-1] + 1 + else: + dp[i][j] = max(dp[i-1][j], dp[i][j-1]) + lcs_length = dp[m][n] + score = lcs_length / min(len(new_words1), len(new_words2)) * 100 + return score + + @staticmethod + def cal_leve(str1: str, str2: str2) -> float: + """ + 计算两个字符串的编辑距离 + """ + words1 = TokenTool.split_words(str1) + words2 = TokenTool.split_words(str2) + new_words1 = [] + new_words2 = [] + for word in words1: + if word not in TokenTool.stopwords: + new_words1.append(word) + for word in words2: + if word not in TokenTool.stopwords: + new_words2.append(word) + if len(new_words1) == 0 and len(new_words2) == 0: + return 100 + if len(new_words1) == 0 or len(new_words2) == 0: + return 0 + m = len(new_words1) + n = len(new_words2) + dp = np.zeros((m+1, n+1)) + for i in range(m+1): + dp[i][0] = i + for j in range(n+1): + dp[0][j] = j + for i in range(1, m+1): + for j in range(1, n+1): + if new_words1[i-1] == new_words2[j-1]: + dp[i][j] = dp[i-1][j-1] + else: + dp[i][j] = min(dp[i-1][j]+1, dp[i][j-1]+1, dp[i-1][j-1]+1) + edit_distance = dp[m][n] + score = (1 - edit_distance / max(len(new_words1), len(new_words2))) * 100 + return score + + @staticmethod + def cal_jac(str1: str, str2: str) -> float: + """ + 计算两个字符串的Jaccard相似度 + """ + words1 = TokenTool.split_words(str1) + words2 = TokenTool.split_words(str2) + new_words1 = [] + new_words2 = [] + for word in words1: + if word not in TokenTool.stopwords: + new_words1.append(word) + for word in words2: + if word not in TokenTool.stopwords: + new_words2.append(word) + if len(new_words1) == 0 and len(new_words2) == 0: + return 100 + if len(new_words1) == 0 or len(new_words2) == 0: + return 0 + set1 = set(new_words1) + set2 = set(new_words2) + intersection = len(set1.intersection(set2)) + union = len(set1.union(set2)) + score = intersection / union * 100 + return score diff --git a/data_chain/rag/base_searcher.py b/data_chain/rag/base_searcher.py new file mode 100644 index 0000000..ace6839 --- /dev/null +++ b/data_chain/rag/base_searcher.py @@ -0,0 +1,48 @@ +import uuid +from pydantic import BaseModel, Field +from data_chain.logger.logger import logger as logging +from data_chain.stores.database.database import ChunkEntity +from data_chain.parser.tools.token_tool import TokenTool + + +class BaseSearcher: + @staticmethod + def find_worker_class(worker_name: str): + subclasses = BaseSearcher.__subclasses__() + for subclass in subclasses: + if subclass.name == worker_name: + return subclass + return None + + @staticmethod + async def search(search_method: str, query: str, top_k: int = 5) -> list[ChunkEntity]: + """ + 检索器 + :param search_method: 检索器方法 + :param query: 查询 + :param top_k: 返回的结果数量 + :return: 检索结果 + """ + search_class = BaseSearcher.find_worker_class(search_method) + if search_class: + return await search_class.search(query, top_k) + else: + err = f"[BaseSearch] 检索器不存在,search_method: {search_method}" + logging.exception(err) + raise err + + @staticmethod + async def rerank(chunk_entities: list[ChunkEntity], query: str) -> list[ChunkEntity]: + """ + 重新排序 + :param list: 检索结果 + :param query: 查询 + :return: 重新排序后的结果 + """ + score_chunk_entities = [] + for chunk_entity in chunk_entities: + score = TokenTool.cal_jac(chunk_entity.text, query) + score_chunk_entities.append((score, chunk_entity)) + score_chunk_entities.sort(key=lambda x: x[0], reverse=True) + sorted_chunk_entities = [chunk_entity for _, chunk_entity in score_chunk_entities] + return sorted_chunk_entities diff --git a/data_chain/stores/database/database.py b/data_chain/stores/database/database.py new file mode 100644 index 0000000..9aec7f8 --- /dev/null +++ b/data_chain/stores/database/database.py @@ -0,0 +1,564 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. +from sqlalchemy import event +from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker +from sqlalchemy import Index +from uuid import uuid4 +from data_chain.logger.logger import logger as logging +from pgvector.sqlalchemy import Vector +from sqlalchemy import Boolean, Column, ForeignKey, Integer, Float, String, func +from sqlalchemy.types import TIMESTAMP, UUID +from sqlalchemy.orm import declarative_base + +from data_chain.config.config import config +from data_chain.entities.enum import (Tokenizer, + ParseMethod, + UserStatus, + UserMessageType, + UserMessageStatus, + KnowledgeBaseStatus, + DocParseRelutTopology, + DocumentStatus, + ChunkType, + ChunkParseTopology, + DataSetStatus, + TestingStatus, + SearchMethod, + TaskType, + TaskStatus) + +Base = declarative_base() + + +class TeamEntity(Base): + __tablename__ = 'team' + + id = Column(UUID, default=uuid4, primary_key=True) + author_id = Column(String) + author_name = Column(String) + name = Column(String) + description = Column(String) + member_cnt = Column(Integer, default=0) + is_public = Column(Boolean, default=True) + status = Column(String) + created_time = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_time = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class TeamMessageEntity(Base): + __tablename__ = 'team_message' + + id = Column(UUID, default=uuid4, primary_key=True) + team_id = Column(UUID, ForeignKey('team.id')) + author_id = Column(String) + author_name = Column(String) + message = Column(String, default='') + created_time = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_time = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class RoleEntity(Base): + __tablename__ = 'role' + + id = Column(UUID, default=uuid4, primary_key=True) + team_id = Column(UUID, ForeignKey('team.id')) + name = Column(String) + is_unique = Column(Boolean, default=False) + editable = Column(Boolean, default=False) + created_time = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_time = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class ActionEntity(Base): + __tablename__ = 'action' + + action = Column(String, primary_key=True) + name = Column(String) + type = Column(String) + created_time = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_time = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class RoleActionEntity(Base): + __tablename__ = 'role_action' + + id = Column(UUID, default=uuid4, primary_key=True) + role_id = Column(UUID, ForeignKey('role.id', ondelete="CASCADE")) + action = Column(String) + created_time = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_time = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class UserEntity(Base): + __tablename__ = 'users' + + id = Column(String, primary_key=True) + name = Column(String) + status = Column(String, default=UserStatus.ACTIVE.value) + created_time = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_time = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class UserMessageEntity(Base): + __tablename__ = 'user_message' + + id = Column(UUID, default=uuid4, primary_key=True) + team_id = Column(UUID) + sender_id = Column(String) + sender_name = Column(String) + receiver_id = Column(String) + receiver_name = Column(String) + message = Column(String) + type = Column(String) + status = Column(String, default=UserMessageStatus.UNREAD.value) + created_time = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + + +class TeamUserEntity(Base): + __tablename__ = 'team_user' + + id = Column(UUID, default=uuid4, primary_key=True) + team_id = Column(UUID, ForeignKey('team.id', ondelete="CASCADE")) # 团队id + user_id = Column(String) # 用户id + created_time = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_time = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class UserRoleEntity(Base): + __tablename__ = 'user_role' + id = Column(UUID, default=uuid4, primary_key=True) + team_id = Column(UUID, ForeignKey('team.id', ondelete="CASCADE")) # 团队id + user_id = Column(String) # 用户id + role_id = Column(UUID) # 角色id + created_time = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_time = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class KnowledgeBaseEntity(Base): + __tablename__ = 'knowledge_base' + + id = Column(UUID, default=uuid4, primary_key=True) + team_id = Column(UUID, ForeignKey('team.id', ondelete="CASCADE")) # 团队id + author_id = Column(String) # 作者id + author_name = Column(String) # 作者名称 + name = Column(String, default='') # 知识库名资产名 + tokenizer = Column(String, default=Tokenizer.ZH.value) # 分词器 + description = Column(String, default='') # 资产描述 + embedding_model = Column(String) # 资产向量化模型 + doc_cnt = Column(Integer, default=0) # 资产文档个数 + doc_size = Column(Integer, default=0) # 资产下所有文档大小(TODO: 单位kb或者字节) + upload_count_limit = Column(Integer, default=128) # 更新次数限制 + upload_size_limit = Column(Integer, default=512) # 更新大小限制 + default_parse_method = Column(String, default=ParseMethod.GENERAL.value) # 默认解析方法 + default_chunk_size = Column(Integer, default=1024) # 默认分块大小 + status = Column(String, default=KnowledgeBaseStatus.IDLE.value) + created_time = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_time = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class DocumentTypeEntity(Base): + __tablename__ = 'document_type' + + id = Column(UUID, default=uuid4, primary_key=True) + kb_id = Column(UUID, ForeignKey('knowledge_base.id', ondelete="CASCADE"), nullable=True) + name = Column(String) + created_time = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_time = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class DocumentEntity(Base): + __tablename__ = 'document' + + id = Column(UUID, default=uuid4, primary_key=True) + team_id = Column(UUID) # 文档所属团队id + kb_id = Column(UUID, ForeignKey('knowledge_base.id', ondelete="CASCADE")) # 文档所属资产id + author_id = Column(String) # 文档作者id + author_name = Column(String) # 文档作者名称 + name = Column(String) # 文档名 + extension = Column(String) # 文件后缀 + size = Column(Integer) # 文档大小 + parse_method = Column(String, default=ParseMethod.GENERAL.value) # 文档解析方法 + parse_relut_topology = Column(String, default=DocParseRelutTopology.LIST.value) # 文档解析结果拓扑结构 + chunk_size = Column(Integer) # 文档分块大小 + type_id = Column(UUID) # 文档类别 + enabled = Column(Boolean) # 文档是否启用 + status = Column(String, default=DocumentStatus.IDLE.value) # 文档状态 + full_text = Column(String) # 文档全文 + abstract = Column(String) # 文档摘要 + abstract_vector = Column(Vector(1024)) # 文档摘要向量 + created_time = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_time = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + __table_args__ = ( + Index( + 'abstract_vector_index', + abstract_vector, + postgresql_using='hnsw', + postgresql_with={'m': 16, 'ef_construction': 200}, + postgresql_ops={'abstract_vector': 'vector_cosine_ops'} + ), + ) + + +class ChunkEntity(Base): + __tablename__ = 'chunk' + + id = Column(UUID, default=uuid4, primary_key=True) # chunk id + team_id = Column(UUID) # 团队id + kb_id = Column(UUID) # 知识库id + doc_id = Column(UUID, ForeignKey('document.id', ondelete="CASCADE")) # 片段所属文档id + text = Column(String) # 片段文本内容 + text_vector = Column(Vector(1024)) # 文本向量 + tokens = Column(Integer) # 片段文本token数 + type = Column(String, default=ChunkType.TEXT.value) # 片段类型 + # 前一个chunk的id(假如解析结果为链表,那么这里是前一个节点的id,如果文档解析结果为树,那么这里是父节点的id) + pre_id_in_parse_topology = Column(UUID) + # chunk的在解析结果中的拓扑类型(假如解析结果为链表,那么这里为链表头、中间和尾;假如解析结果为树,那么这里为树根、树的中间节点和叶子节点) + parse_topology_type = Column(String, default=ChunkParseTopology.LISTHEAD.value) + global_offset = Column(Integer) # chunk在文档中的相对偏移 + local_offset = Column(Integer) # chunk在块中的相对偏移 + enabled = Column(Boolean) # chunk是否启用 + status = Column(String) # chunk状态 + created_time = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_time = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp()) + + __table_args__ = ( + Index( + 'text_vector_index', + text_vector, + postgresql_using='hnsw', + postgresql_with={'m': 16, 'ef_construction': 200}, + postgresql_ops={'text_vector': 'vector_cosine_ops'} + ), + ) + + +class ImageEntity(Base): + __tablename__ = 'image' + id = Column(UUID, default=uuid4, primary_key=True) # 图片id + team_id = Column(UUID) # 团队id + doc_id = Column(UUID) # 图片所属文档id + chunk_id = Column(UUID) # 图片所属chunk的id + extension = Column(String) # 图片后缀 + created_time = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_time = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class DataSetEntity(Base): + __tablename__ = 'dataset' + + id = Column(UUID, default=uuid4, primary_key=True) # 数据集id + team_id = Column(UUID) # 数据集所属团队id + kb_id = Column(UUID, ForeignKey('knowledge_base.id', ondelete="CASCADE")) # 数据集所属资产id + author_id = Column(String) # 数据的创建者id + author_name = Column(String) # 数据的创建者名称 + llm_id = Column(String) # 数据的生成使用的大模型的id + name = Column(String, nullable=False) # 数据集名称 + description = Column(String) # 数据集描述 + data_cnt = Column(Integer) # 数据集数据量 + is_data_cleared = Column(Boolean, default=False) # 数据集是否清洗 + is_chunk_related = Column(Boolean, default=False) # 数据集是否关联上下文 + status = Column(String, default=DataSetStatus.IDLE) # 数据集状态 + score = Column(Float) # 数据集得分 + created_at = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_at = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class DataSetDocEntity(Base): + __tablename__ = 'dataset_doc' + + id = Column(UUID, default=uuid4, primary_key=True) # 数据集文档id + dataset_id = Column(UUID, ForeignKey('dataset.id', ondelete="CASCADE")) # 数据集id + doc_id = Column(UUID) # 文档id + created_at = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_at = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class QAEntity(Base): + __tablename__ = 'qa' + + id = Column(UUID, default=uuid4, primary_key=True) # 数据id + dataset_id = Column(UUID, ForeignKey('dataset.id', ondelete="CASCADE")) # 数据所属数据集id + doc_id = Column(UUID) # 数据关联的文档id + doc_name = Column(String) # 数据关联的文档名称 + question = Column(String) # 数据的问题 + answer = Column(String) # 数据的答案 + chunk = Column(String) # 数据的片段 + status = Column(String) + created_at = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_at = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class TestingEntity(Base): + __tablename__ = 'testing' + + id = Column(UUID, default=uuid4, primary_key=True) # 测试任务的id + team_id = Column(UUID) # 测试任务所属团队id + kb_id = Column(UUID) # 测试任务所属资产id + dataset_id = Column(UUID, ForeignKey('dataset.id', ondelete="CASCADE")) # 测试任务使用数据集的id + author_id = Column(String) # 测试任务的创建者id + author_name = Column(String) # 测试任务的创建者名称 + name = Column(String) # 测试任务的名称 + description = Column(String) # 测试任务的描述 + llm_id = Column(String) # 测试任务的使用的大模型 + search_method = Column(String, default=SearchMethod.KEYWORD_EMBEDDING.value) # 测试任务的使用的检索增强模式类型 + status = Column(String, default=TestingStatus.IDLE.value) # 测试任务的状态 + ave_score = Column(Float) # 测试任务的综合得分 + ave_pre = Column(Float) # 测试任务的平均召回率 + ave_rec = Column(Float) # 测试任务的平均精确率 + ave_fai = Column(Float) # 测试任务的平均忠实值 + ave_rel = Column(Float) # 测试任务的平均可解释性 + ave_lcs = Column(Float) # 测试任务的平均最长公共子序列得分 + ave_leve = Column(Float) # 测试任务的平均编辑距离得分 + ave_jac = Column(Float) # 测试任务的平均杰卡德相似系数 + created_at = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_at = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class TestCaseEntity(Base): + __tablename__ = 'testcase' + + id = Column(UUID, default=uuid4, primary_key=True) # 测试case的id + testing_id = Column(UUID, ForeignKey('testing.id', ondelete="CASCADE")) # 测试 + question = Column(String) # 数据的问题 + answer = Column(String) # 数据的答案 + chunk = Column(String) # 数据的片段 + llm_answer = Column(String) # 测试答案 + related_chunk = Column(String) # 测试关联到的chunk + doc_name = Column(String) # 测试关联的文档名称 + score = Column(Float) # 测试得分 + pre = Column(Float) # 召回率 + rec = Column(Float) # 精确率 + fai = Column(Float) # 忠实值 + rel = Column(Float) # 可解释性 + lcs = Column(Float) # 最长公共子序列得分 + leve = Column(Float) # 编辑距离得分 + jac = Column(Float) # 杰卡德相似系数 + status = Column(String) # 测试状态 + created_at = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_at = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class TaskEntity(Base): + __tablename__ = 'task' + + id = Column(UUID, default=uuid4, primary_key=True) + team_id = Column(UUID) # 团队id + user_id = Column(String, ForeignKey('users.id', ondelete="CASCADE")) # 创建者id + op_id = Column(UUID) # 任务关联的实体id, 资产或者文档id + op_name = Column(String) # 任务关联的实体名称 + type = Column(String) # 任务类型 + retry = Column(Integer) # 重试次数 + status = Column(String) # 任务状态 + created_time = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_time = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class TaskReportEntity(Base): + __tablename__ = 'task_report' + + id = Column(UUID, default=uuid4, primary_key=True) # 任务报告的id + task_id = Column(UUID, ForeignKey('task.id', ondelete="CASCADE")) # 任务id + message = Column(String) # 任务报告信息 + current_stage = Column(Integer) # 任务当前阶段 + stage_cnt = Column(Integer) # 任务总的阶段 + created_time = Column( + TIMESTAMP(timezone=True), + nullable=True, + server_default=func.current_timestamp() + ) + updated_time = Column( + TIMESTAMP(timezone=True), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp() + ) + + +class DataBase: + engine = None + + @classmethod + async def init_all_table(cls): + if DataBase.engine is None: + DataBase.engine = create_async_engine( + config['DATABASE_URL'], + echo=False, + pool_recycle=300, + pool_pre_ping=True + ) + if 'opengauss' in config['DATABASE_URL']: + from opengauss_sqlalchemy.register_async import register_vector + + @event.listens_for(DataBase.engine.sync_engine, "connect") + def connect(dbapi_connection, connection_record): + dbapi_connection.run_async(register_vector) + async with DataBase.engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + + @classmethod + async def get_session(cls): + connection = async_sessionmaker(DataBase.engine, expire_on_commit=False)() + return cls._ConnectionManager(connection) + + class _ConnectionManager: + def __init__(self, connection): + self.connection = connection + + async def __aenter__(self): + return self.connection + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.connection.close() diff --git a/data_chain/stores/minio/minio.py b/data_chain/stores/minio/minio.py index ab35459..a9ac1db 100644 --- a/data_chain/stores/minio/minio.py +++ b/data_chain/stores/minio/minio.py @@ -1,30 +1,46 @@ # Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. +import asyncio from datetime import timedelta from data_chain.logger.logger import logger as logging import concurrent from minio import Minio -from data_chain.models.constant import OssConstant +from data_chain.entities.common import ( + DOC_PATH_IN_MINIO, + EXPORT_KB_PATH_IN_MINIO, + IMPORT_KB_PATH_IN_MINIO, + EXPORT_DATASET_PATH_IN_MINIO, + IMPORT_DATASET_PATH_IN_MINIO, + TESTING_REPORT_PATH_IN_MINIO +) from data_chain.config.config import config - - class MinIO(): client = Minio( endpoint=config['MINIO_ENDPOINT'], access_key=config['MINIO_ACCESS_KEY'], secret_key=config['MINIO_SECRET_KEY'], secure=config['MINIO_SECURE']) - found = client.bucket_exists(OssConstant.MINIO_BUCKET_DOCUMENT) + + found = client.bucket_exists(DOC_PATH_IN_MINIO) + if not found: + client.make_bucket(DOC_PATH_IN_MINIO) + found = client.bucket_exists(EXPORT_KB_PATH_IN_MINIO) if not found: - client.make_bucket(OssConstant.MINIO_BUCKET_DOCUMENT) - found = client.bucket_exists(OssConstant.MINIO_BUCKET_KNOWLEDGEBASE) + client.make_bucket(EXPORT_KB_PATH_IN_MINIO) + found = client.bucket_exists(IMPORT_KB_PATH_IN_MINIO) if not found: - client.make_bucket(OssConstant.MINIO_BUCKET_KNOWLEDGEBASE) - found = client.bucket_exists(OssConstant.MINIO_BUCKET_PICTURE) + client.make_bucket(IMPORT_KB_PATH_IN_MINIO) + found = client.bucket_exists(EXPORT_DATASET_PATH_IN_MINIO) if not found: - client.make_bucket(OssConstant.MINIO_BUCKET_PICTURE) + client.make_bucket(EXPORT_DATASET_PATH_IN_MINIO) + found = client.bucket_exists(IMPORT_DATASET_PATH_IN_MINIO) + if not found: + client.make_bucket(IMPORT_DATASET_PATH_IN_MINIO) + found = client.bucket_exists(TESTING_REPORT_PATH_IN_MINIO) + if not found: + client.make_bucket(TESTING_REPORT_PATH_IN_MINIO) @staticmethod async def put_object(bucket_name: str, file_index: str, file_path: str): @@ -35,13 +51,11 @@ class MinIO(): @params file_path: 上传文件目录, 绝对路径 """ try: - with concurrent.futures.ThreadPoolExecutor() as executor: - future=executor.submit(MinIO.client.fput_object, bucket_name, file_index, file_path) - future.result() + await asyncio.to_thread(MinIO.client.fput_object, bucket_name, file_index, file_path) return True except Exception as e: - logging.error("Put object={} into bucket={} error: {}".format( - file_index, bucket_name, e)) + err = f"上传文件 {file_index} 到桶 {bucket_name} 失败: {e}" + logging.error("[MinIO] %s", err) return False @staticmethod @@ -53,9 +67,11 @@ class MinIO(): """ try: MinIO.client.remove_object(bucket_name=bucket_name, object_name=file_index) + return True except Exception as e: - logging.error("Delete object={} from bucket={} error: {}".format( - file_index, bucket_name, e)) + err = f"删除文件 {file_index} 在桶 {bucket_name} 失败: {e}" + logging.error("[MinIO] %s", err) + return False @staticmethod async def download_object(bucket_name: str, file_index: str, file_path: str): @@ -66,13 +82,11 @@ class MinIO(): @params file_path: 下载指定目录, 绝对路径 """ try: - with concurrent.futures.ThreadPoolExecutor() as executor: - future=executor.submit(MinIO.client.fget_object, bucket_name, file_index, file_path) - future.result() + await asyncio.to_thread(MinIO.client.fget_object, bucket_name, file_index, file_path) return True except Exception as e: - logging.error("Download object={} from bucket={} error: {}".format( - file_path, bucket_name, e)) + err = f"下载文件 {file_index} 在桶 {bucket_name} 失败: {e}" + logging.error("[MinIO] %s", err) return False @staticmethod @@ -86,6 +100,6 @@ class MinIO(): try: return MinIO.client.presigned_get_object(bucket_name=bucket_name, object_name=file_name, expires=expires) except Exception as e: - logging.error("Generate object={} download link from bucket={} error: {}".format( - file_name, bucket_name, e)) - return "" + err = f"生成文件 {file_name} 在桶 {bucket_name} 的下载链接失败: {e}" + logging.error("[MinIO] %s", err) + return "" diff --git a/data_chain/stores/mongodb/mongodb.py b/data_chain/stores/mongodb/mongodb.py new file mode 100644 index 0000000..0292264 --- /dev/null +++ b/data_chain/stores/mongodb/mongodb.py @@ -0,0 +1,74 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + +from __future__ import annotations +from pydantic import BaseModel, Field +from datetime import datetime +from pymongo import AsyncMongoClient +from typing import TYPE_CHECKING +import uuid + +from data_chain.config.config import config +from data_chain.logger.logger import logger as logging + + +class Session(BaseModel): + """ + Session + + collection: session + """ + + id: str = Field(alias="_id") + ip: str + user_sub: str | None = None + nonce: str | None = None + expired_at: datetime + + +class Task(BaseModel): + """ + collection: witchiand_task + """ + + task_id: uuid.UUID = Field(alias="_id") + status: str + created_time: datetime = Field(default_factory=datetime.now) + + +if TYPE_CHECKING: + from pymongo.asynchronous.client_session import AsyncClientSession + from pymongo.asynchronous.collection import AsyncCollection + + +class MongoDB: + """MongoDB连接""" + + user = config['MONGODB_USER'] + password = config['MONGODB_PASSWORD'] + host = config['MONGODB_HOST'] + port = config['MONGODB_PORT'] + _client: AsyncMongoClient = AsyncMongoClient( + f"mongodb://{user}:{password}@{host}:{port}/?directConnection=true&replicaSet=rs0", + ) + + @classmethod + def get_collection(cls, collection_name: str) -> AsyncCollection: + """获取MongoDB集合(表)""" + try: + return cls._client[config['MONGODB_DATABASE']][collection_name] + except Exception as e: + logger.exception("[MongoDB] 获取集合 %s 失败", collection_name) + raise RuntimeError(str(e)) from e + + @classmethod + async def clear_collection(cls, collection_name: str) -> None: + """清空MongoDB集合(表)""" + try: + await cls._client[config['MONGODB_DATABASE']][collection_name].delete_many({}) + except Exception: + logger.exception("[MongoDB] 清空集合 %s 失败", collection_name) + + @classmethod + def get_session(cls) -> AsyncClientSession: + """获取MongoDB会话""" + return cls._client.start_session() diff --git a/data_chain/stores/postgres/postgres.py b/data_chain/stores/postgres/postgres.py deleted file mode 100644 index 1e71f91..0000000 --- a/data_chain/stores/postgres/postgres.py +++ /dev/null @@ -1,381 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from sqlalchemy import event -from opengauss_sqlalchemy.register_async import register_vector -from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker -from sqlalchemy import create_engine, Index -from uuid import uuid4 -from typing import List -from data_chain.logger.logger import logger as logging -import time -from pgvector.sqlalchemy import Vector -from sqlalchemy import Boolean, Column, ForeignKey, Integer, String, CheckConstraint, func, MetaData, Table -from sqlalchemy.types import TIMESTAMP, UUID -from sqlalchemy.orm import declarative_base, relationship - -from data_chain.config.config import config -from data_chain.models.api import CreateKnowledgeBaseRequest -from data_chain.models.constant import KnowledgeStatusEnum, ParseMethodEnum - -Base = declarative_base() - - -class User(Base): - __tablename__ = 'users' - - id = Column(UUID, default=uuid4, primary_key=True) # 用户id - account = Column(String, unique=True) # 用户账号 - email = Column(String, unique=True) # 用户邮箱 - passwd = Column(String) - name = Column(String) - language = Column(String, default='zh') - role = Column(String, default='') - status = Column(String, default='activate') - created_time = Column(TIMESTAMP(timezone=True), nullable=True, server_default=func.current_timestamp()) - updated_time = Column( - TIMESTAMP(timezone=True), - server_default=func.current_timestamp(), - onupdate=func.current_timestamp()) - __table_args__ = ( - CheckConstraint("language IN ('en', 'zh')", name='valid_language'), - ) - - -class ModelEntity(Base): - __tablename__ = 'model' - id = Column(UUID, default=uuid4, primary_key=True) - user_id = Column(UUID, ForeignKey('users.id', ondelete="CASCADE")) - is_online = Column(Boolean, default=False) - model_name = Column(String) - model_type = Column(String) - openai_api_base = Column(String) - encrypted_openai_api_key = Column(String) - encrypted_config = Column(String) - max_tokens = Column(Integer) - created_time = Column(TIMESTAMP(timezone=True), nullable=True, server_default=func.current_timestamp()) - updated_time = Column( - TIMESTAMP(timezone=True), - server_default=func.current_timestamp(), - onupdate=func.current_timestamp()) - - -class KnowledgeBaseEntity(Base): - __tablename__ = 'knowledge_base' - - id = Column(UUID, default=uuid4, primary_key=True) - user_id = Column(UUID, ForeignKey('users.id', ondelete="CASCADE")) # 用户id - name = Column(String, default='') # 知识库名资产名 - language = Column(String, default='zh') # 资产文档语言 - description = Column(String, default='') # 资产描述 - embedding_model = Column(String) # 资产向量化模型 - document_number = Column(Integer, default=0) # 资产文档个数 - document_size = Column(Integer, default=0) # 资产下所有文档大小(TODO: 单位kb或者字节) - default_parser_method = Column(String, default=ParseMethodEnum.GENERAL) # 默认解析方法 - default_chunk_size = Column(Integer, default=1024) # 默认分块大小 - vector_items_id = Column(UUID, default=uuid4) # 向量表id - status = Column(String, default=KnowledgeStatusEnum.IDLE) - created_time = Column(TIMESTAMP(timezone=True), nullable=True, server_default=func.current_timestamp()) - updated_time = Column( - TIMESTAMP(timezone=True), - server_default=func.current_timestamp(), - onupdate=func.current_timestamp()) - - def keys(self): - return [key for key in self.__dict__ if getattr(self, key) is not None] - - def __getitem__(self, item): - return getattr(self, item) - - __table_args__ = ( - CheckConstraint("language IN ('en', 'zh')", name='valid_language'), - ) - - # TODO: 放到convertor里面 - @staticmethod - def from_create_request(request: CreateKnowledgeBaseRequest, types: List[str] = []): - return KnowledgeBaseEntity( - name=request.name, - language=request.language, - description=request.description, - embedding_model=request.embedding_model, - document_number=0, - document_size=0, - default_parser_method=request.default_parser_method, - default_chunk_size=request.default_chunk_size) - - -class DocumentTypeEntity(Base): - __tablename__ = 'document_type' - - id = Column(UUID, default=uuid4, primary_key=True) - kb_id = Column(UUID, ForeignKey('knowledge_base.id', ondelete="CASCADE"), nullable=True) - type = Column(String) - - -class DocumentEntity(Base): - __tablename__ = 'document' - - id = Column(UUID, default=uuid4, primary_key=True) - user_id = Column(UUID) # 用户id - kb_id = Column(UUID, ForeignKey('knowledge_base.id', ondelete="CASCADE")) # 文档所属资产id - name = Column(String) # 文档名 - extension = Column(String) # 文件后缀 - size = Column(Integer) # 文档大小 (TODO: 单位kb或者字节) - parser_method = Column(String) # 文档解析方法(类似于embedding或者nlp之类的) - type_id = Column(UUID) # 文档所属领域 - chunk_size = Column(Integer) # 文档分块大小 - enabled = Column(Boolean) # 文档是否启用 - status = Column(String) - full_text = Column(String) #文档全文 - created_time = Column(TIMESTAMP(timezone=True), nullable=True, server_default=func.current_timestamp()) - updated_time = Column( - TIMESTAMP(timezone=True), - server_default=func.current_timestamp(), - onupdate=func.current_timestamp()) - - def keys(self): - return ['id', 'user_id', 'kb_id', 'name', 'extension', 'size', 'parser_method', 'type_id', 'chunk_size', 'enabled', 'status', 'created_time', 'updated_time'] - - def __getitem__(self, item): - return getattr(self, item) - - -class ImageEntity(Base): - __tablename__ = 'image' - id = Column(UUID, default=uuid4, primary_key=True) - user_id = Column(UUID) - document_id = Column(UUID) - chunk_id = Column(UUID, ForeignKey('chunk.id', ondelete="CASCADE")) - extension = Column(String) # 文件后缀 - created_time = Column(TIMESTAMP(timezone=True), nullable=True, server_default=func.current_timestamp()) - updated_time = Column( - TIMESTAMP(timezone=True), - server_default=func.current_timestamp(), - onupdate=func.current_timestamp()) - - -class ChunkEntity(Base): - __tablename__ = 'chunk' - - id = Column(UUID, default=uuid4, primary_key=True) - user_id = Column(UUID) # 用户id - kb_id = Column(UUID) # 用户id - document_id = Column(UUID, ForeignKey('document.id', ondelete="CASCADE")) # 片段所属文档id - text = Column(String) # 片段文本内容 - tokens = Column(Integer) # 片段文本token数 - type = Column(String) # chunk类型, 例如是text还是边还是节点 - global_offset = Column(Integer) # chunk在文档中的相对偏移 - local_offset = Column(Integer) # chunk在块中的相对偏移 - enabled = Column(Boolean) # chunk是否启用 - status = Column(String) - created_time = Column(TIMESTAMP(timezone=True), nullable=True, server_default=func.current_timestamp()) - updated_time = Column( - TIMESTAMP(timezone=True), - server_default=func.current_timestamp(), - onupdate=func.current_timestamp()) - - -class ChunkLinkEntity(Base): - __tablename__ = 'chunk_link' - - id = Column(UUID, default=uuid4, primary_key=True) - chunk_a_id = Column(UUID, ForeignKey('chunk.id', ondelete="CASCADE")) - chunk_b_id = Column(UUID) - type = Column(String) # link类型(例如是上下文, 或者节点之间的关系) - created_time = Column(TIMESTAMP(timezone=True), nullable=True, server_default=func.current_timestamp()) - updated_time = Column( - TIMESTAMP(timezone=True), - server_default=func.current_timestamp(), - onupdate=func.current_timestamp()) - - -class TaskEntity(Base): - __tablename__ = 'task' - - id = Column(UUID, default=uuid4, primary_key=True) - user_id = Column(UUID, ForeignKey('users.id', ondelete="CASCADE")) # 用户id - op_id = Column(UUID) # TODO: 具体名字待定, 任务关联的业务id, 资产或者文档id - type = Column(String) - retry = Column(Integer) - status = Column(String) - created_time = Column(TIMESTAMP(timezone=True), nullable=True, server_default=func.current_timestamp()) - updated_time = Column( - TIMESTAMP(timezone=True), - server_default=func.current_timestamp(), - onupdate=func.current_timestamp()) - - -class TaskStatusReportEntity(Base): - # TODO: 待补充 - __tablename__ = 'task_report' - - id = Column(UUID, default=uuid4, primary_key=True) - task_id = Column(UUID, ForeignKey('task.id', ondelete="CASCADE")) - message = Column(String) - current_stage = Column(Integer) - stage_cnt = Column(Integer) - created_time = Column(TIMESTAMP(timezone=True), nullable=True, server_default=func.current_timestamp()) - updated_time = Column( - TIMESTAMP(timezone=True), - server_default=func.current_timestamp(), - onupdate=func.current_timestamp()) - - -class TemporaryDocumentEntity(Base): - __tablename__ = 'temporary_document' - id = Column(UUID, default=uuid4, primary_key=True) - name = Column(String) - extension = Column(String) - bucket_name = Column(String) - parser_method = Column(String) - chunk_size = Column(Integer) # 文档分块大小 - full_text = Column(String) #文档全文 - status = Column(String) - created_time = Column(TIMESTAMP(timezone=True), nullable=True, server_default=func.current_timestamp()) - - -class TemporaryChunkEntity(Base): - __tablename__ = 'temporary_chunk' - id = Column(UUID, default=uuid4, primary_key=True) - document_id = Column(UUID, ForeignKey('temporary_document.id', ondelete="CASCADE")) - text = Column(String) - tokens = Column(Integer) - type = Column(String) - global_offset = Column(Integer) - created_time = Column(TIMESTAMP(timezone=True), nullable=True, server_default=func.current_timestamp()) - - -class TemporaryVectorItemstEntity(Base): - __tablename__ = 'temporary_vector_items' - id = Column(UUID, default=uuid4, primary_key=True) - document_id = Column(UUID) - chunk_id = Column(UUID, ForeignKey('temporary_chunk.id', ondelete="CASCADE")) - vector = Column(Vector(1024)) - created_time = Column(TIMESTAMP(timezone=True), nullable=True, server_default=func.current_timestamp()) -# 反射加载 chunk 表的元数据 - - -def reflect_chunk_table(engine): - metadata = MetaData() - metadata.reflect(bind=engine, only=['chunk']) - return metadata.tables['chunk'] - - -class PostgresDB: - - @classmethod - async def init_all_table(cls): - engine = create_async_engine( - config['DATABASE_URL'], - echo=False, - pool_recycle=300, - pool_pre_ping=True) - async with engine.begin() as conn: - await conn.run_sync(Base.metadata.create_all) - await engine.dispose() - - @classmethod - async def get_dynamic_vector_items_table(cls, uuid_str, vector_dim): - # 使用同步引擎 - if config['DATABASE_TYPE'] == 'postgres': - sync_engine = create_engine( - config['DATABASE_URL'].replace("postgresql+asyncpg", "postgresql"), - echo=False, pool_recycle=300, pool_pre_ping=True) - elif config['DATABASE_TYPE'] == 'opengauss': - sync_engine = create_engine( - config['DATABASE_URL'].replace("opengauss+asyncpg", "opengauss+psycopg2"), - echo=False, pool_recycle=300, pool_pre_ping=True) - # 反射加载 chunk 表的元数据 - chunk_table = reflect_chunk_table(sync_engine) - - metadata = MetaData() - - # 动态创建表 - vector_items_table = Table( - f'vector_items_{uuid_str}', metadata, - Column('id', UUID, default=uuid4, primary_key=True), - Column('user_id', UUID), # 用户id - Column('chunk_id', UUID, ForeignKey(chunk_table.c.id, ondelete="CASCADE")), - Column('kb_id', UUID), - Column('document_id', UUID), - Column('vector', Vector(vector_dim)), # 替代具体的向量存储方式 - Column('enabled', Boolean), # vector对应的chunk是否启用 - Column('created_time', TIMESTAMP(timezone=True), nullable=True, server_default=func.current_timestamp()), - Column('updated_time', TIMESTAMP(timezone=True), - server_default=func.current_timestamp(), - onupdate=func.current_timestamp()), - ) - - # 动态创建索引 - if config['DATABASE_TYPE']=='postgres': - index = Index( - f'general_text_vector_index_{uuid_str}', - vector_items_table.c.vector, - postgresql_using='hnsw', - postgresql_with={'m': 16, 'ef_construction': 200}, - postgresql_ops={'vector': 'vector_cosine_ops'} - ) - elif config['DATABASE_TYPE']=='opengauss': - index = Index( - f'general_text_vector_index_{uuid_str}', - vector_items_table.c.vector, - opengauss_using='hnsw', - opengauss_with={'m': 16, 'ef_construction': 200}, - opengauss_ops={'vector': 'vector_cosine_ops'} - ) - - # 将索引添加到表定义中 - vector_items_table.append_constraint(index) - - # 创建表 - with sync_engine.begin() as conn: - metadata.create_all(conn) - - return vector_items_table - - @classmethod - async def create_table(cls, table: Table): - engine = create_async_engine( - config['DATABASE_URL'], - echo=False, - pool_recycle=300, - pool_pre_ping=True) - async with engine.begin() as conn: - await conn.run_sync(lambda conn: table.metadata.create_all(conn, tables=[table])) - await engine.dispose() - - @classmethod - async def drop_table(cls, table: Table): - engine = create_async_engine( - config['DATABASE_URL'], - echo=False, - pool_recycle=300, - pool_pre_ping=True) - async with engine.begin() as conn: - await conn.run_sync(lambda conn: table.metadata.drop_all(conn, tables=[table])) - await engine.dispose() - - @classmethod - async def get_session(cls): - engine = create_async_engine( - config['DATABASE_URL'], - echo=False, - pool_recycle=300, - pool_pre_ping=True) - if config['DATABASE_TYPE'] == 'opengauss': - @event.listens_for(engine.sync_engine, "connect") - def connect(dbapi_connection, connection_record): - dbapi_connection.run_async(register_vector) - connection = async_sessionmaker(engine, expire_on_commit=False)() - return cls._ConnectionManager(engine, connection) - - class _ConnectionManager: - def __init__(self, engine, connection): - self.connection = connection - self.engine = engine - - async def __aenter__(self): - return self.connection - - async def __aexit__(self, exc_type, exc_val, exc_tb): - await self.connection.close() - await self.engine.dispose() diff --git a/data_chain/stores/redis/redis.py b/data_chain/stores/redis/redis.py index a4b9627..20a189a 100644 --- a/data_chain/stores/redis/redis.py +++ b/data_chain/stores/redis/redis.py @@ -1,4 +1,4 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. +# Copyright (c) Huawei Technologies Co., Ltd. 2023-20245. All rights reserved. import redis from data_chain.logger.logger import logger as logging @@ -6,9 +6,6 @@ from data_chain.logger.logger import logger as logging from data_chain.config.config import config - - - class RedisConnectionPool: @classmethod @@ -24,11 +21,11 @@ class RedisConnectionPool: except Exception as e: logging.error(f"Init redis connection failed due to error: {e}") return None - return cls._ConnectionManager(_redis_pool,pool) + return cls._ConnectionManager(_redis_pool, pool) class _ConnectionManager: - def __init__(self,_redis_pool,connection): - self._redis_pool=_redis_pool + def __init__(self, _redis_pool, connection): + self._redis_pool = _redis_pool self.connection = connection def __enter__(self): @@ -39,4 +36,3 @@ class RedisConnectionPool: self.connection.close() except Exception as e: logging.error(f"Redis connection close failed due to error: {e}") - diff --git a/example.html b/example.html new file mode 100644 index 0000000..3c4251f --- /dev/null +++ b/example.html @@ -0,0 +1,113 @@ + + + + + + + 包含多种标签的 HTML 示例 + + + + + +

这是一级标题(h1)

+ + +

这是二级标题(h2)

+ + +

这是三级标题(h3)

+ + +

这是四级标题(h4)

+ + +
这是五级标题(h5)
+ + +
这是六级标题(h6)
+ + +

这是一个普通的段落(p),用于展示文本内容。在这里可以写一些描述性的文字,用来介绍相关的主题或者内容信息。

+ + +

以下是一个简单的代码示例(code):

+ + function add(a, b) { + return a + b; + } + + + +
+

这是一张示例图片(img):

+ 示例图片 +
+ + +

示例表格(table)

+ + + + + + + + + + + + + + + + + + + + +
姓名年龄城市
张三25北京
李四30上海
+ + +

这是一个超链接(a),点击这里 可以访问示例网站。

+ + +
+

这是一个包含在 div 内的标题(h3)

+

这是 div 内的段落内容,div 常用于页面布局和元素分组,可以方便地对一组相关元素进行样式设置和操作。

+
+ + + + \ No newline at end of file diff --git a/example.md b/example.md new file mode 100644 index 0000000..0a85985 --- /dev/null +++ b/example.md @@ -0,0 +1,21 @@ +# Main Title +This is a paragraph. +![Sample Image](https://static.cse.sustech.edu.cn/upload/images/20241021/1729501102223117.jpg) + +| Header 1 | Header 2 | +|----------|----------| +| Cell 1 | Cell 2 | + +## Subtitle 1 +This is a sub - paragraph. +### Sub - subtitle 1 +Another sub - sub - paragraph. + +| Header 1 | Header 2 | +|----------|----------| +| Cell 1 | Cell 2 | + +`python +def say_hello(): + print("Hello, world!") +` \ No newline at end of file diff --git a/example.zip b/example.zip new file mode 100644 index 0000000000000000000000000000000000000000..5cf6eb50174f1f89195bbb01a6eb18eaa74a2725 GIT binary patch literal 404 zcmWIWW@Zs#U|`^22#;JAbtW*S_!W@1oRNWni$R7VwIVUMASYEXHzhQLlYx0#tz-rW zmsW5yFtPyEF@TA+{Tuxb8wj+O+aKK;Db#suBV%ZZvrcM4mU+tyQ?rc|rK2s~R|Lt) zxjS%LPiy!8UVrVU{sYcf_Uh?-602Wxd~)r$k$gLO%N*T#I<_8mY0qO5P)l4s3om{Yp&A;;><#g9&(asKu7{q6}l zalZBk@00=yZS shLJmDvNa&%3T!1$#8%QN15C#BgJ& + +

这是一段文本。

+ + + + + +
表格单元格 1表格单元格 2
+ 示例图片 + print('这是一段代码') + 示例链接 + + \ No newline at end of file -- Gitee From 6b9bc5dccc3331a7ef11a9dc3071dafaa0de3bbb Mon Sep 17 00:00:00 2001 From: zxstty Date: Tue, 13 May 2025 12:13:15 +0800 Subject: [PATCH 02/15] =?UTF-8?q?=E5=AE=8C=E5=96=84rag=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../apps/service/acc_testing_service.py | 4 +- data_chain/common/prompt.yaml | 19 +++ data_chain/embedding/embedding.py | 8 +- data_chain/entities/enum.py | 4 +- data_chain/entities/response_data.py | 16 +-- data_chain/manager/chunk_manager.py | 28 +++-- data_chain/manager/document_manager.py | 19 ++- data_chain/parser/tools/token_tool.py | 6 +- data_chain/rag/base_searcher.py | 112 +++++++++++++++++- data_chain/rag/doc2chunk_bfs_searcher.py | 0 data_chain/rag/doc2chunk_searcher.py | 45 +++++++ data_chain/rag/enhanced_by_llm_searcher.py | 68 +++++++++++ data_chain/rag/keyword_and_vector_searcher.py | 40 +++++++ data_chain/rag/keyword_searcher.py | 33 ++++++ data_chain/rag/vector_searcher.py | 34 ++++++ data_chain/stores/mongodb/mongodb.py | 6 +- openapi.json | 2 +- 17 files changed, 403 insertions(+), 41 deletions(-) create mode 100644 data_chain/rag/doc2chunk_bfs_searcher.py create mode 100644 data_chain/rag/doc2chunk_searcher.py create mode 100644 data_chain/rag/enhanced_by_llm_searcher.py create mode 100644 data_chain/rag/keyword_and_vector_searcher.py create mode 100644 data_chain/rag/keyword_searcher.py create mode 100644 data_chain/rag/vector_searcher.py diff --git a/data_chain/apps/service/acc_testing_service.py b/data_chain/apps/service/acc_testing_service.py index 92e8697..4008f2a 100644 --- a/data_chain/apps/service/acc_testing_service.py +++ b/data_chain/apps/service/acc_testing_service.py @@ -155,7 +155,7 @@ class TestingService: raise e @staticmethod - async def run_testing_by_testing_id(testing_id: uuid.UUID, run: bool) -> None: + async def run_testing_by_testing_id(testing_id: uuid.UUID, run: bool) -> uuid.UUID: """运行测试""" try: testing_entity = await TestingManager.get_testing_by_testing_id(testing_id) @@ -163,6 +163,7 @@ class TestingService: if testing_entity.status != TestingStatus.IDLE.value: return None task_id = await TaskQueueService.init_task(TaskType.TESTING_RUN.value, testing_entity.id) + return task_id else: task_entity = await TaskManager.get_current_task_by_op_id(testing_id) if not task_entity: @@ -170,6 +171,7 @@ class TestingService: if task_entity.status != TaskStatus.PENDING.value and task_entity.status != TaskStatus.RUNNING.value: return None await TaskQueueService.stop_task(task_entity.id) + return task_entity.id except Exception as e: err = "运行测试失败" logging.exception("[TestingService] %s", err) diff --git a/data_chain/common/prompt.yaml b/data_chain/common/prompt.yaml index c1895ba..c44be25 100644 --- a/data_chain/common/prompt.yaml +++ b/data_chain/common/prompt.yaml @@ -328,4 +328,23 @@ ACC_ANALYSIS_RESULT_MERGE_PROMPT: '你是一个文本分析专家,你的任务 分析结果1:{analysis_result_1} 分析结果2:{analysis_result_2} ' +CHUNK_QUERY_MATCH_PROMPT: '你是一个文本分析专家,你的任务是根据给出的片段和问题判断,片段是否与问题相关 + 注意: + #01 如果片段与问题相关,请输出YES + #02 如果片段与问题不相关,请输出NO + #03 请仅输出YES或NO,不要输出其他内容 + 例子: + 输入1: + 片段:openEuler是一个开源的操作系统。 + 问题:openEuler是什么操作系统? + 输出1:YES + 输入2: + 片段:白马非马 + 问题:openEuler是什么操作系统? + 输出2:NO + + 下面是给出的片段和问题: + 片段:{chunk} + 问题:{question} + " \ No newline at end of file diff --git a/data_chain/embedding/embedding.py b/data_chain/embedding/embedding.py index 66512e1..a0c0e2b 100644 --- a/data_chain/embedding/embedding.py +++ b/data_chain/embedding/embedding.py @@ -11,6 +11,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) class Embedding(): @staticmethod async def vectorize_embedding(text): + vector = None if config['EMBEDDING_TYPE'] == 'openai': headers = { "Authorization": f"Bearer {config['EMBEDDING_API_KEY']}" @@ -24,7 +25,7 @@ class Embedding(): res = requests.post(url=config["EMBEDDING_ENDPOINT"], headers=headers, json=data, verify=False) if res.status_code != 200: return None - return res.json()['data'][0]['embedding'] + vector = res.json()['data'][0]['embedding'] except Exception as e: err = f"[Embedding] 向量化失败 ,error: {e}" logging.exception(err) @@ -37,10 +38,13 @@ class Embedding(): res = requests.post(url=config["EMBEDDING_ENDPOINT"], json=data, verify=False) if res.status_code != 200: return None - return json.loads(res.text)[0] + vector = json.loads(res.text)[0] except Exception as e: err = f"[Embedding] 向量化失败 ,error: {e}" logging.exception(err) return None else: return None + while len(vector) < 1024: + vector.append(0) + return vector[:1024] diff --git a/data_chain/entities/enum.py b/data_chain/entities/enum.py index 838f1a6..323248a 100644 --- a/data_chain/entities/enum.py +++ b/data_chain/entities/enum.py @@ -152,8 +152,8 @@ class TestCaseStatus(str, Enum): class SearchMethod(str, Enum): """搜索方法""" KEYWORD = "keyword" - EMBEDDING = "embedding" - KEYWORD_EMBEDDING = "keyword_embedding" + VECTOR = "vector" + KEYWORD_AND_VECTOR = "keyword_and_vector" DOC2CHUNK = "doc2chunk" DOC2CHUNK_BFS = "doc2chunk_bfs" ENHANCED_BY_LLM = "enhanced_by_llm" diff --git a/data_chain/entities/response_data.py b/data_chain/entities/response_data.py index 03038f8..ddfe55b 100644 --- a/data_chain/entities/response_data.py +++ b/data_chain/entities/response_data.py @@ -455,14 +455,14 @@ class TestCase(BaseModel): class TestingTestCase(BaseModel): """GET /testing/testcase 数据结构""" - ave_score: float = Field(description="平均综合得分", alias="aveScore") - ave_pre: float = Field(description="平均精确率", alias="avePre") - ave_rec: float = Field(description="平均召回率", alias="aveRec") - ave_fai: float = Field(description="平均忠实值", alias="aveFai") - ave_rel: float = Field(description="平均可解释性", alias="aveRel") - ave_lcs: float = Field(description="平均最长公共子串得分", alias="aveLcs") - ave_leve: float = Field(description="平均编辑距离得分", alias="aveLeve") - ave_jac: float = Field(description="平均杰卡德相似系数", alias="aveJac") + ave_score: float = Field(default=-1, description="平均综合得分", alias="aveScore") + ave_pre: float = Field(default=-1, description="平均精确率", alias="avePre") + ave_rec: float = Field(default=-1, description="平均召回率", alias="aveRec") + ave_fai: float = Field(default=-1, description="平均忠实值", alias="aveFai") + ave_rel: float = Field(default=-1, description="平均可解释性", alias="aveRel") + ave_lcs: float = Field(default=-1, description="平均最长公共子串得分", alias="aveLcs") + ave_leve: float = Field(default=-1, description="平均编辑距离得分", alias="aveLeve") + ave_jac: float = Field(default=-1, description="平均杰卡德相似系数", alias="aveJac") test_cases: list[TestCase] = Field(default=[], description="测试用例列表", alias="testCases") diff --git a/data_chain/manager/chunk_manager.py b/data_chain/manager/chunk_manager.py index aceb28a..10270fe 100644 --- a/data_chain/manager/chunk_manager.py +++ b/data_chain/manager/chunk_manager.py @@ -132,9 +132,9 @@ class ChunkManager(): raise e @staticmethod - async def get_topk_chunk_by_kb_ids_vector( - kb_id: uuid.UUID, doc_ids: list[uuid.UUID], vector: List[float], - topk: int, banned_id: list[uuid.UUID] = []) -> List[ChunkEntity]: + async def get_top_k_chunk_by_kb_id_vector( + kb_id: uuid.UUID, vector: List[float], + top_k: int, doc_ids: list[uuid.UUID] = None, banned_id: list[uuid.UUID] = []) -> List[ChunkEntity]: """根据知识库ID和向量查询文档解析结果""" try: async with await DataBase.get_session() as session: @@ -143,15 +143,16 @@ class ChunkManager(): .join(DocumentEntity, DocumentEntity.id == ChunkEntity.doc_id ) - .join(DocumentEntity.id.in_(doc_ids)) .where(DocumentEntity.enabled == True) .where(DocumentEntity.status != DocumentStatus.DELETED.value) .where(ChunkEntity.kb_id == kb_id) .where(ChunkEntity.status != ChunkStatus.DELETED.value) .where(ChunkEntity.id.notin_(banned_id)) .order_by(ChunkEntity.text_vector.cosine_distance(vector).desc()) - .limit(topk) + .limit(top_k) ) + if doc_ids: + stmt = stmt.where(DocumentEntity.id.in_(doc_ids)) result = await session.execute(stmt) chunk_entities = result.scalars().all() return chunk_entities @@ -161,9 +162,9 @@ class ChunkManager(): raise e @staticmethod - async def get_topk_chunk_by_kb_id_keyword( - kb_id: uuid.UUID, doc_ids: list[uuid.UUID], query: str, - topk: int, banned_id: list[uuid.UUID] = []) -> List[ChunkEntity]: + async def get_top_k_chunk_by_kb_id_keyword( + kb_id: uuid.UUID, query: str, + top_k: int, doc_ids: list[uuid.UUID] = None, banned_id: list[uuid.UUID] = []) -> List[ChunkEntity]: """根据知识库ID和关键词查询文档解析结果""" try: async with await DataBase.get_session() as session: @@ -181,7 +182,6 @@ class ChunkManager(): .join(DocumentEntity, DocumentEntity.id == ChunkEntity.doc_id ) - .where(DocumentEntity.id.in_(doc_ids)) .where(DocumentEntity.enabled == True) .where(DocumentEntity.status != DocumentStatus.DELETED.value) .where(ChunkEntity.kb_id == kb_id) @@ -193,8 +193,10 @@ class ChunkManager(): func.plainto_tsquery(tokenizer, query) ).desc() ) - .limit(topk) + .limit(top_k) ) + if doc_ids: + stmt = stmt.where(DocumentEntity.id.in_(doc_ids)) result = await session.execute(stmt) chunk_entities = result.scalars().all() return chunk_entities @@ -206,7 +208,7 @@ class ChunkManager(): @staticmethod async def fetch_surrounding_chunk_by_doc_id_and_global_offset( doc_id: uuid.UUID, global_offset: int, - topk: int = 50) -> List[ChunkEntity]: + top_k: int = 50) -> List[ChunkEntity]: """根据文档ID和全局偏移量查询文档解析结果""" try: async with await DataBase.get_session() as session: @@ -214,8 +216,8 @@ class ChunkManager(): select(ChunkEntity) .where(and_(ChunkEntity.doc_id == doc_id, ChunkEntity.status != ChunkStatus.DELETED.value)) - .where(or_(ChunkEntity.global_offset >= global_offset - topk, - ChunkEntity.global_offset <= global_offset + topk)) + .where(or_(ChunkEntity.global_offset >= global_offset - top_k, + ChunkEntity.global_offset <= global_offset + top_k)) .order_by(ChunkEntity.global_offset) ) result = await session.execute(stmt) diff --git a/data_chain/manager/document_manager.py b/data_chain/manager/document_manager.py index 69b7998..39aece1 100644 --- a/data_chain/manager/document_manager.py +++ b/data_chain/manager/document_manager.py @@ -35,20 +35,24 @@ class DocumentManager(): pass @staticmethod - async def get_topk_document_by_kb_id_vector( - kb_id: uuid.UUID, vector: list[float], topk: int) -> List[DocumentEntity]: + async def get_top_k_document_by_kb_id_vector( + kb_id: uuid.UUID, vector: list[float], + top_k: int = 5, doc_ids: list[uuid.UUID] = None, banned_id: list[uuid.UUID] = []) -> List[DocumentEntity]: """根据知识库ID和向量获取前K个文档""" try: async with await DataBase.get_session() as session: stmt = ( select(DocumentEntity) .where(DocumentEntity.kb_id == kb_id) + .where(DocumentEntity.id.notin_(banned_id)) .where(DocumentEntity.status != DocumentStatus.DELETED.value) .where(DocumentEntity.enabled == True) .where(DocumentEntity.abstract_vector.cosine_distance(vector).desc()) .order_by(DocumentEntity.abstract_vector.cosine_distance(vector).desc()) - .limit(topk) + .limit(top_k) ) + if doc_ids: + stmt = stmt.where(DocumentEntity.id.in_(doc_ids)) result = await session.execute(stmt) document_entities = result.scalars().all() return document_entities @@ -58,8 +62,8 @@ class DocumentManager(): raise e @staticmethod - async def get_topk_document_by_kb_id_keyword( - kb_id: uuid.UUID, query: str, topk: int) -> List[DocumentEntity]: + async def get_top_k_document_by_kb_id_keyword( + kb_id: uuid.UUID, query: str, top_k: int = 5, doc_ids: list[uuid.UUID] = None, banned_id: list[uuid.UUID] = []) -> List[DocumentEntity]: """根据知识库ID和关键词获取前K个文档""" try: async with await DataBase.get_session() as session: @@ -75,14 +79,17 @@ class DocumentManager(): stmt = ( select(DocumentEntity) .where(DocumentEntity.kb_id == kb_id) + .where(DocumentEntity.id.notin_(banned_id)) .where(DocumentEntity.status != DocumentStatus.DELETED.value) .where(DocumentEntity.enabled == True) .where(func.ts_rank_cd( func.to_tsvector(tokenizer, DocumentEntity.abstract), func.plainto_tsquery(tokenizer, query) ).desc()) - .limit(topk) + .limit(top_k) ) + if doc_ids: + stmt = stmt.where(DocumentEntity.id.in_(doc_ids)) result = await session.execute(stmt) document_entities = result.scalars().all() return document_entities diff --git a/data_chain/parser/tools/token_tool.py b/data_chain/parser/tools/token_tool.py index ab5af72..a96bcb2 100644 --- a/data_chain/parser/tools/token_tool.py +++ b/data_chain/parser/tools/token_tool.py @@ -139,7 +139,7 @@ class TokenTool: def get_top_k_keywords(content: str, k=10) -> list: try: # 使用jieba提取关键词 - keywords = extract_tags(content, topK=k, withWeight=True) + keywords = extract_tags(content, top_k=k, withWeight=True) return [keyword for keyword, weight in keywords] except Exception as e: err = f"[TokenTool] 获取关键词失败 {e}" @@ -485,6 +485,8 @@ class TokenTool: """ 计算两个字符串的Jaccard相似度 """ + if len(str1) == 0 and len(str2) == 0: + return 100 words1 = TokenTool.split_words(str1) words2 = TokenTool.split_words(str2) new_words1 = [] @@ -495,8 +497,6 @@ class TokenTool: for word in words2: if word not in TokenTool.stopwords: new_words2.append(word) - if len(new_words1) == 0 and len(new_words2) == 0: - return 100 if len(new_words1) == 0 or len(new_words2) == 0: return 0 set1 = set(new_words1) diff --git a/data_chain/rag/base_searcher.py b/data_chain/rag/base_searcher.py index ace6839..5d82eda 100644 --- a/data_chain/rag/base_searcher.py +++ b/data_chain/rag/base_searcher.py @@ -1,8 +1,20 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. import uuid from pydantic import BaseModel, Field +import random from data_chain.logger.logger import logger as logging from data_chain.stores.database.database import ChunkEntity from data_chain.parser.tools.token_tool import TokenTool +from data_chain.manager.chunk_manager import ChunkManager + + +class DocChunk(BaseModel): + """ + 文档块 + """ + doc_id: uuid.UUID = Field(..., description="文档ID") + doc_name: str = Field(..., description="文档名称") + chunks: list[ChunkEntity] = Field(..., description="文档块列表") class BaseSearcher: @@ -15,7 +27,9 @@ class BaseSearcher: return None @staticmethod - async def search(search_method: str, query: str, top_k: int = 5) -> list[ChunkEntity]: + async def search( + search_method: str, kb_id: uuid.UUID, query: str, top_k: int = 5, doc_ids: list[uuid.UUID] = None, + banned_id: list[uuid.UUID] = []) -> list[ChunkEntity]: """ 检索器 :param search_method: 检索器方法 @@ -25,7 +39,9 @@ class BaseSearcher: """ search_class = BaseSearcher.find_worker_class(search_method) if search_class: - return await search_class.search(query, top_k) + return await search_class.search( + query=query, kb_id=kb_id, top_k=top_k, doc_ids=doc_ids, banned_id=banned_id + ) else: err = f"[BaseSearch] 检索器不存在,search_method: {search_method}" logging.exception(err) @@ -46,3 +62,95 @@ class BaseSearcher: score_chunk_entities.sort(key=lambda x: x[0], reverse=True) sorted_chunk_entities = [chunk_entity for _, chunk_entity in score_chunk_entities] return sorted_chunk_entities + + @staticmethod + async def related_surround_chunk(chunk_entity: ChunkEntity, tokens_limit: int = 1024) -> list[ChunkEntity]: + """ + 相关上下文 + :param list: 检索结果 + :param query: 查询 + :return: 相关上下文 + """ + chunk_entities = await ChunkManager.fetch_surrounding_chunk_by_doc_id_and_global_offset(chunk_entity.doc_id, chunk_entity.global_offset) + chunk_entity_dict = {} + min_offset = chunk_entity.global_offset + max_offset = chunk_entity.global_offset + for chunk_entity in chunk_entities: + if chunk_entity.global_offset < min_offset: + min_offset = chunk_entity.global_offset + if chunk_entity.global_offset > max_offset: + max_offset = chunk_entity.global_offset + chunk_entity_dict[chunk_entity.global_offset] = chunk_entity + lower = chunk_entity.global_offset-1 + upper = chunk_entity.global_offset+1 + related_chunk_entities = [] + tokens_sub = 0 + find_lower = True + while 1: + if lower < min_offset and upper > max_offset: + break + if tokens_sub < 0: + if lower >= min_offset: + find_lower = True + else: + find_lower = False + elif tokens_sub > 0: + if upper <= max_offset: + find_lower = False + else: + find_lower = True + else: + rd = random.randint(0, 1) + if rd == 0: + if lower >= min_offset: + find_lower = True + else: + find_lower = False + else: + if upper <= max_offset: + find_lower = False + else: + find_lower = True + if find_lower: + if chunk_entity_dict.get(lower) is not None: + tokens_sub += chunk_entity_dict[lower].tokens + related_chunk_entities.append(chunk_entity_dict[lower]) + lower -= 1 + else: + if chunk_entity_dict.get(upper) is not None: + tokens_sub += chunk_entity_dict[upper].tokens + related_chunk_entities.append(chunk_entity_dict[upper]) + upper += 1 + return related_chunk_entities + + @staticmethod + async def unique_chunk(chunk_entities: list[ChunkEntity]) -> list[ChunkEntity]: + """ + 去重 + :param list: 检索结果 + :return: 去重后的结果 + """ + unique_chunk_entities = [] + chunk_entity_dict = {} + for chunk_entity in chunk_entities: + if chunk_entity.doc_id not in chunk_entity_dict: + chunk_entity_dict[chunk_entity.doc_id] = chunk_entity + unique_chunk_entities.append(chunk_entity) + return unique_chunk_entities + + @staticmethod + async def classify_by_doc_id(chunk_entities: list[ChunkEntity]) -> list[DocChunk]: + """ + 按照文档ID分类 + :param list: 检索结果 + :return: 分类后的结果 + """ + doc_chunk_dict = {} + for chunk_entity in chunk_entities: + if chunk_entity.doc_id not in doc_chunk_dict: + doc_chunk_dict[chunk_entity.doc_id] = DocChunk( + doc_id=chunk_entity.doc_id, doc_name=chunk_entity.doc_name, chunks=[]) + doc_chunk_dict[chunk_entity.doc_id].chunks.append(chunk_entity) + for doc_chunk in doc_chunk_dict.values(): + doc_chunk.chunks = sorted(doc_chunk.chunks, key=lambda x: x.global_offset) + return list(doc_chunk_dict.values()) diff --git a/data_chain/rag/doc2chunk_bfs_searcher.py b/data_chain/rag/doc2chunk_bfs_searcher.py new file mode 100644 index 0000000..e69de29 diff --git a/data_chain/rag/doc2chunk_searcher.py b/data_chain/rag/doc2chunk_searcher.py new file mode 100644 index 0000000..d42899c --- /dev/null +++ b/data_chain/rag/doc2chunk_searcher.py @@ -0,0 +1,45 @@ +import uuid +from pydantic import BaseModel, Field +import random +from data_chain.logger.logger import logger as logging +from data_chain.stores.database.database import ChunkEntity +from data_chain.parser.tools.token_tool import TokenTool +from data_chain.manager.document_manager import DocumentManager +from data_chain.manager.chunk_manager import ChunkManager +from data_chain.rag.base_searcher import BaseSearcher +from data_chain.embedding.embedding import Embedding +from data_chain.entities.enum import SearchMethod + + +class Doc2Chunk(BaseModel): + """ + 关键词向量检索 + """ + name = SearchMethod.DOC2CHUNK.value + + @staticmethod + async def search( + query: str, kb_id: uuid.UUID, top_k: int = 5, doc_ids=list[uuid.UUID], + banned_id=list[uuid.UUID] + ) -> list[ChunkEntity]: + """ + 向量检索 + :param query: 查询 + :param top_k: 返回的结果数量 + :return: 检索结果 + """ + vector = await Embedding.vectorize_embedding(query) + try: + doc_entities_keyword = await DocumentManager.get_top_k_document_by_kb_id_keyword(kb_id, query, top_k, doc_ids, banned_id) + use_doc_ids = [doc_entity.id for doc_entity in doc_entities_keyword] + doc_entities_vector = await DocumentManager.get_top_k_document_by_kb_id_vector(kb_id, vector, top_k-len(doc_entities_keyword), doc_ids, banned_id) + use_doc_ids += [doc_entity.id for doc_entity in doc_entities_vector] + chunk_entities_keyword = await ChunkManager.get_top_k_chunk_by_kb_id_keyword(kb_id, query, top_k//2, use_doc_ids, banned_id) + chunk_ids = [chunk_entity.id for chunk_entity in chunk_entities_keyword] + chunk_entities_vector += await ChunkManager.get_top_k_chunk_by_kb_id_vector(kb_id, vector, top_k-len(chunk_entities_keyword), use_doc_ids, banned_id+chunk_ids) + chunk_entities = chunk_entities_keyword + chunk_entities_vector + except Exception as e: + err = f"[KeywordVectorSearcher] 关键词向量检索失败,error: {e}" + logging.exception(err) + return [] + return chunk_entities diff --git a/data_chain/rag/enhanced_by_llm_searcher.py b/data_chain/rag/enhanced_by_llm_searcher.py new file mode 100644 index 0000000..2adea98 --- /dev/null +++ b/data_chain/rag/enhanced_by_llm_searcher.py @@ -0,0 +1,68 @@ +import uuid +import yaml +from pydantic import BaseModel, Field +import random +from data_chain.logger.logger import logger as logging +from data_chain.stores.database.database import ChunkEntity +from data_chain.parser.tools.token_tool import TokenTool +from data_chain.manager.chunk_manager import ChunkManager +from data_chain.rag.base_searcher import BaseSearcher +from data_chain.embedding.embedding import Embedding +from data_chain.entities.enum import SearchMethod +from data_chain.parser.tools.token_tool import TokenTool +from data_chain.llm.llm import LLM +from data_chain.config.config import config + + +class EnhancedByLLMSearcher(BaseSearcher): + """ + 基于大模型的搜索 + """ + name = SearchMethod.ENHANCED_BY_LLM.value + + @staticmethod + async def search( + query: str, kb_id: uuid.UUID, top_k: int = 5, doc_ids=list[uuid.UUID], + banned_id=list[uuid.UUID] + ) -> list[ChunkEntity]: + """ + 向量检索 + :param query: 查询 + :param top_k: 返回的结果数量 + :return: 检索结果 + """ + vector = await Embedding.vectorize_embedding(query) + try: + with open('./data_chain/common/prompt.yaml', 'r', encoding='utf-8') as f: + prompt_dict = yaml.safe_load(f) + prompt_template = prompt_dict['CHUNK_QUERY_MATCH_PROMPT'] + chunk_entities = [] + rd = 0 + max_retry = 5 + llm = LLM( + openai_api_key=config['OPENAI_API_KEY'], + openai_api_base=config['OPENAI_API_BASE'], + model_name=config['MODEL_NAME'], + max_tokens=config['MAX_TOKENS'], + ) + while len(chunk_entities) < top_k and rd < max_retry: + rd += 1 + sub_chunk_entities = await ChunkManager.get_top_k_chunk_by_kb_id_vector(kb_id, vector, top_k, doc_ids, banned_id) + for chunk_entity in sub_chunk_entities: + sys_call = prompt_template.format( + chunk=chunk_entity.text, + query=query, + ) + user_call = "请输出YES或NO" + result = await llm.nostream([], sys_call, user_call) + result = result.lower() + if result == "yes": + chunk_entities.append(chunk_entity) + chunk_ids = [chunk_entity.id for chunk_entity in sub_chunk_entities] + banned_id += chunk_ids + return chunk_entities[:top_k] + except Exception as e: + err = f"[KeywordVectorSearcher] 关键词向量检索失败,error: {e}" + logging.exception(err) + return [] + return chunk_entities diff --git a/data_chain/rag/keyword_and_vector_searcher.py b/data_chain/rag/keyword_and_vector_searcher.py new file mode 100644 index 0000000..4c82ec7 --- /dev/null +++ b/data_chain/rag/keyword_and_vector_searcher.py @@ -0,0 +1,40 @@ +import uuid +from pydantic import BaseModel, Field +import random +from data_chain.logger.logger import logger as logging +from data_chain.stores.database.database import ChunkEntity +from data_chain.parser.tools.token_tool import TokenTool +from data_chain.manager.chunk_manager import ChunkManager +from data_chain.rag.base_searcher import BaseSearcher +from data_chain.embedding.embedding import Embedding +from data_chain.entities.enum import SearchMethod + + +class KeywordVectorSearcher(BaseSearcher): + """ + 关键词向量检索 + """ + name = SearchMethod.KEYWORD_AND_VECTOR.value + + @staticmethod + async def search( + query: str, kb_id: uuid.UUID, top_k: int = 5, doc_ids=list[uuid.UUID], + banned_id=list[uuid.UUID] + ) -> list[ChunkEntity]: + """ + 向量检索 + :param query: 查询 + :param top_k: 返回的结果数量 + :return: 检索结果 + """ + vector = await Embedding.vectorize_embedding(query) + try: + chunk_entities_get_by_keyword = await ChunkManager.get_top_k_chunk_by_kb_id_keyword(kb_id, query, top_k//2, doc_ids, banned_id) + chunk_ids = [chunk_entity.id for chunk_entity in chunk_entities_get_by_keyword] + chunk_entities_get_by_vector = await ChunkManager.get_top_k_chunk_by_kb_id_vector(kb_id, vector, top_k-len(chunk_entities_get_by_keyword), doc_ids, banned_id+chunk_ids) + chunk_entities = chunk_entities_get_by_keyword + chunk_entities_get_by_vector + except Exception as e: + err = f"[KeywordVectorSearcher] 关键词向量检索失败,error: {e}" + logging.exception(err) + return [] + return chunk_entities diff --git a/data_chain/rag/keyword_searcher.py b/data_chain/rag/keyword_searcher.py new file mode 100644 index 0000000..40fa2dd --- /dev/null +++ b/data_chain/rag/keyword_searcher.py @@ -0,0 +1,33 @@ +import uuid +from pydantic import BaseModel, Field +import random +from data_chain.logger.logger import logger as logging +from data_chain.stores.database.database import ChunkEntity +from data_chain.parser.tools.token_tool import TokenTool +from data_chain.manager.chunk_manager import ChunkManager +from data_chain.rag.base_searcher import BaseSearcher +from data_chain.embedding.embedding import Embedding +from data_chain.entities.enum import SearchMethod + + +class KeyWordSearcher(BaseSearcher): + name = SearchMethod.KEYWORD.value + + @staticmethod + async def search( + query: str, kb_id: uuid.UUID, top_k: int = 5, doc_ids=list[uuid.UUID], + banned_id=list[uuid.UUID] + ) -> list[ChunkEntity]: + """ + 向量检索 + :param query: 查询 + :param top_k: 返回的结果数量 + :return: 检索结果 + """ + try: + chunk_entities = await ChunkManager.get_top_k_chunk_by_kb_id_keyword(kb_id, query, top_k, doc_ids, banned_id) + except Exception as e: + err = f"[KeyWordSearcher] 关键词检索失败,error: {e}" + logging.exception(err) + return [] + return chunk_entities diff --git a/data_chain/rag/vector_searcher.py b/data_chain/rag/vector_searcher.py new file mode 100644 index 0000000..a6bd912 --- /dev/null +++ b/data_chain/rag/vector_searcher.py @@ -0,0 +1,34 @@ +import uuid +from pydantic import BaseModel, Field +import random +from data_chain.logger.logger import logger as logging +from data_chain.stores.database.database import ChunkEntity +from data_chain.parser.tools.token_tool import TokenTool +from data_chain.manager.chunk_manager import ChunkManager +from data_chain.rag.base_searcher import BaseSearcher +from data_chain.embedding.embedding import Embedding +from data_chain.entities.enum import SearchMethod + + +class VectorSearcher(BaseSearcher): + name = SearchMethod.VECTOR.value + + @staticmethod + async def search( + query: str, kb_id: uuid.UUID, top_k: int = 5, doc_ids=list[uuid.UUID], + banned_id=list[uuid.UUID] + ) -> list[ChunkEntity]: + """ + 向量检索 + :param query: 查询 + :param top_k: 返回的结果数量 + :return: 检索结果 + """ + vector = await Embedding.vectorize_embedding(query) + try: + chunk_entities = await ChunkManager.get_top_k_chunk_by_kb_id_vector(kb_id, vector, top_k, doc_ids, banned_id) + except Exception as e: + err = f"[VectorSearcher] 向量检索失败,error: {e}" + logging.exception(err) + return [] + return chunk_entities diff --git a/data_chain/stores/mongodb/mongodb.py b/data_chain/stores/mongodb/mongodb.py index 0292264..3fdd18d 100644 --- a/data_chain/stores/mongodb/mongodb.py +++ b/data_chain/stores/mongodb/mongodb.py @@ -8,7 +8,7 @@ from typing import TYPE_CHECKING import uuid from data_chain.config.config import config -from data_chain.logger.logger import logger as logging +from data_chain.logging.logging import logging as logging class Session(BaseModel): @@ -57,7 +57,7 @@ class MongoDB: try: return cls._client[config['MONGODB_DATABASE']][collection_name] except Exception as e: - logger.exception("[MongoDB] 获取集合 %s 失败", collection_name) + logging.exception("[MongoDB] 获取集合 %s 失败", collection_name) raise RuntimeError(str(e)) from e @classmethod @@ -66,7 +66,7 @@ class MongoDB: try: await cls._client[config['MONGODB_DATABASE']][collection_name].delete_many({}) except Exception: - logger.exception("[MongoDB] 清空集合 %s 失败", collection_name) + logging.exception("[MongoDB] 清空集合 %s 失败", collection_name) @classmethod def get_session(cls) -> AsyncClientSession: diff --git a/openapi.json b/openapi.json index 214ca4e..f1e5934 100644 --- a/openapi.json +++ b/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"FastAPI","version":"0.1.0"},"paths":{"/routes":{"get":{"summary":"Get All Routes","operationId":"get_all_routes_routes_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/team/list":{"post":{"tags":["Team"],"summary":"List Teams","operationId":"list_teams_team_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/usr":{"post":{"tags":["Team"],"summary":"List Team User By Team Id","operationId":"list_team_user_by_team_id_team_usr_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_usr_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamUserRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team User By Team Id And User Subs","operationId":"delete_team_user_by_team_id_and_user_subs_team_usr_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string"},"title":"Usersub"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/msg":{"post":{"tags":["Team"],"summary":"List Team Msg By Team Id","operationId":"list_team_msg_by_team_id_team_msg_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team":{"post":{"tags":["Team"],"summary":"Create Team","operationId":"create_team_team_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team By Team Id","operationId":"delete_team_by_team_id_team_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/invitation":{"post":{"tags":["Team"],"summary":"Invite Team User By User Sub","operationId":"invite_team_user_by_user_sub_team_invitation_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"userSubInvite","in":"query","required":true,"schema":{"type":"string","title":"Usersubinvite"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/InviteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/application":{"post":{"tags":["Team"],"summary":"Join Team","operationId":"join_team_team_application_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JoinTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/author":{"put":{"tags":["Team"],"summary":"Update Team Author By Team Id","operationId":"update_team_author_by_team_id_team_author_put","parameters":[{"name":"recriverSub","in":"query","required":true,"schema":{"type":"string","title":"Recriversub"}},{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamAuthorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb":{"get":{"tags":["Knowledge Base"],"summary":"List Kb By User Sub","operationId":"list_kb_by_user_sub_kb_get","parameters":[{"name":"kbName","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListAllKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"tags":["Knowledge Base"],"summary":"Create Kb","operationId":"create_kb_kb_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Knowledge Base"],"summary":"Update Kb By Kb Id","operationId":"update_kb_by_kb_id_kb_put","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Knowledge Base"],"summary":"Delete Kb By Kb Ids","operationId":"delete_kb_by_kb_ids_kb_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/team":{"post":{"tags":["Knowledge Base"],"summary":"List Kb By Team Id","operationId":"list_kb_by_team_id_kb_team_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/doc_type":{"get":{"tags":["Knowledge Base"],"summary":"List Doc Types By Kb Id","operationId":"list_doc_types_by_kb_id_kb_doc_type_get","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentTypesResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/download":{"get":{"tags":["Knowledge Base"],"summary":"Download Kb By Task Id","operationId":"download_kb_by_task_id_kb_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/import":{"post":{"tags":["Knowledge Base"],"summary":"Import Kbs","operationId":"import_kbs_kb_import_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_kbs_kb_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/export":{"post":{"tags":["Knowledge Base"],"summary":"Export Kb By Kb Ids","operationId":"export_kb_by_kb_ids_kb_export_post","parameters":[{"name":"kbIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/list":{"post":{"tags":["Chunk"],"summary":"List Chunks By Document Id","operationId":"list_chunks_by_document_id_chunk_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk":{"put":{"tags":["Chunk"],"summary":"Update Chunk By Id","operationId":"update_chunk_by_id_chunk_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/list":{"post":{"tags":["Document"],"summary":"List Doc","operationId":"list_doc_doc_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/download":{"get":{"tags":["Document"],"summary":"Download Doc By Id","operationId":"download_doc_by_id_doc_download_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/report":{"get":{"tags":["Document"],"summary":"Get Doc Report","operationId":"get_doc_report_doc_report_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetDocumentReportResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc":{"post":{"tags":["Document"],"summary":"Upload Docs","operationId":"upload_docs_doc_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_upload_docs_doc_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UploadDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Document"],"summary":"Update Doc By Doc Id","operationId":"update_doc_by_doc_id_doc_put","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/parse":{"post":{"tags":["Document"],"summary":"Parse Docuement By Doc Ids","operationId":"parse_docuement_by_doc_ids_doc_parse_post","parameters":[{"name":"docIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}},{"name":"parse","in":"query","required":true,"schema":{"type":"boolean","title":"Parse"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ParseDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/health_check":{"get":{"tags":["Health check"],"summary":"Health Check","operationId":"health_check_health_check_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/dataset/list":{"post":{"tags":["Dataset"],"summary":"List Datasets By Kb Id","operationId":"list_datasets_by_kb_id_dataset_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/data":{"post":{"tags":["Dataset"],"summary":"List Data In Dataset By Dataset Id","operationId":"list_data_in_dataset_by_dataset_id_dataset_data_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/testing/exist":{"get":{"tags":["Dataset"],"summary":"Is Dataset Have Testing","operationId":"is_dataset_have_testing_dataset_testing_exist_get","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/IsDatasetHaveTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/download":{"get":{"tags":["Dataset"],"summary":"Download Dataset By Task Id","operationId":"download_dataset_by_task_id_dataset_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset":{"post":{"tags":["Dataset"],"summary":"Create Dataset","operationId":"create_dataset_dataset_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Dataset By Dataset Id","operationId":"update_dataset_by_dataset_id_dataset_put","parameters":[{"name":"databaseId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Databaseid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Dataset By Dataset Ids","operationId":"delete_dataset_by_dataset_ids_dataset_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Databaseid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/import":{"post":{"tags":["Dataset"],"summary":"Import Dataset","operationId":"import_dataset_dataset_import_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_dataset_dataset_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/export":{"post":{"tags":["Dataset"],"summary":"Export Dataset By Dataset Ids","operationId":"export_dataset_by_dataset_ids_dataset_export_post","parameters":[{"name":"datasetIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Datasetids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/generate":{"post":{"tags":["Dataset"],"summary":"Generate Dataset By Id","operationId":"generate_dataset_by_id_dataset_generate_post","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}},{"name":"generate","in":"query","required":true,"schema":{"type":"boolean","title":"Generate"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/other/llm":{"get":{"tags":["Other"],"summary":"List Llms By User Sub","operationId":"list_llms_by_user_sub_other_llm_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListLLMResponse"}}}}}}},"/other/embedding":{"get":{"tags":["Other"],"summary":"List Embeddings","operationId":"list_embeddings_other_embedding_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListEmbeddingResponse"}}}}}}},"/other/tokenizer":{"get":{"tags":["Other"],"summary":"List Tokenizers","operationId":"list_tokenizers_other_tokenizer_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTokenizerResponse"}}}}}}},"/other/parse_method":{"get":{"tags":["Other"],"summary":"List Parse Method","operationId":"list_parse_method_other_parse_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListParseMethodResponse"}}}}}}},"/other/search_method":{"get":{"tags":["Other"],"summary":"List Search Method","operationId":"list_search_method_other_search_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListSearchMethodResponse"}}}}}}},"/testing/list":{"post":{"tags":["Testing"],"summary":"List Testing By Kb Id","operationId":"list_testing_by_kb_id_testing_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/testcase":{"post":{"tags":["Testing"],"summary":"List Test Case By Testing Id","operationId":"list_test_case_by_testing_id_testing_testcase_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestCaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/download":{"get":{"tags":["Testing"],"summary":"Download Testing Report By Testing Id","operationId":"download_testing_report_by_testing_id_testing_download_get","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing":{"post":{"tags":["Testing"],"summary":"Create Testing","operationId":"create_testing_testing_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingResponsing"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Testing"],"summary":"Update Testing By Testing Id","operationId":"update_testing_by_testing_id_testing_put","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Testing"],"summary":"Delete Testing By Testing Ids","operationId":"delete_testing_by_testing_ids_testing_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Testingid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/run":{"post":{"tags":["Testing"],"summary":"Run Testing By Testing Id","operationId":"run_testing_by_testing_id_testing_run_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}},{"name":"run","in":"query","required":true,"schema":{"type":"boolean","title":"Run"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/RunTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role/action":{"get":{"tags":["Role"],"summary":"List Actions","operationId":"list_actions_role_action_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListActionResponse"}}}}}}},"/role/list":{"post":{"tags":["Role"],"summary":"List Role By Team Id","operationId":"list_role_by_team_id_role_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role":{"post":{"tags":["Role"],"summary":"Create Role","operationId":"create_role_role_post","parameters":[{"name":"TeamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Role"],"summary":"Update Role By Role Id","operationId":"update_role_by_role_id_role_put","parameters":[{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Role"],"summary":"Delete Role By Role Ids","operationId":"delete_role_by_role_ids_role_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Roleid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg/list":{"post":{"tags":["User Message"],"summary":"List User Msgs By User Sub","operationId":"list_user_msgs_by_user_sub_usr_msg_list_post","parameters":[{"name":"msgType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserMessageType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg":{"put":{"tags":["User Message"],"summary":"Update User Msg By Msg Id","operationId":"update_user_msg_by_msg_id_usr_msg_put","parameters":[{"name":"msgId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Msgid"}},{"name":"msgStatus","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserStatus"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["User Message"],"summary":"Delete User Msg By Msg Ids","operationId":"delete_user_msg_by_msg_ids_usr_msg_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Msgids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task":{"post":{"tags":["Task"],"summary":"List Task","operationId":"list_task_task_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Task"],"summary":"Delete Task By Task Ids Or Task Type","operationId":"delete_task_by_task_ids_or_task_type_task_delete","parameters":[{"name":"taskType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/TaskType"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Taskid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/report":{"get":{"tags":["Task"],"summary":"Get Task Report By Task Id","operationId":"get_task_report_by_task_id_task_report_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetTaskReportResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"ActionType":{"type":"string","enum":["team","user","role","knowledge_base","document","chunk","dataset","testing","dataset_data","task"],"title":"ActionType","description":"操作类型"},"Body_import_dataset_dataset_import_post":{"properties":{"dataset_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Dataset Packages"}},"type":"object","required":["dataset_packages"],"title":"Body_import_dataset_dataset_import_post"},"Body_import_kbs_kb_import_post":{"properties":{"kb_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Kb Packages"}},"type":"object","required":["kb_packages"],"title":"Body_import_kbs_kb_import_post"},"Body_upload_docs_doc_post":{"properties":{"docs":{"items":{"type":"string","format":"binary"},"type":"array","title":"Docs"}},"type":"object","required":["docs"],"title":"Body_upload_docs_doc_post"},"Chunk":{"properties":{"chunkId":{"type":"string","format":"uuid","title":"Chunkid","description":"分片ID"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"分片类型"},"text":{"type":"string","title":"Text","description":"分片文本"}},"type":"object","required":["chunkId","chunkType","text"],"title":"Chunk","description":"文档分片信息"},"ChunkType":{"type":"string","enum":["text","table","image","code","link"],"title":"ChunkType","description":"分块类型"},"CreateDatasetRequest":{"properties":{"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""},"documentIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Documentids","description":"测试数据集关联的文档","default":[]},"dataCnt":{"type":"integer","title":"Datacnt","description":"测试数据集内的数据数量","default":64,"max":512,"min":1},"llmId":{"type":"string","title":"Llmid","description":"测试数据集使用的大模型id"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"测试数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"测试数据集进行上下文关联","default":false}},"type":"object","required":["llmId"],"title":"CreateDatasetRequest"},"CreateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"CreateDatasetResponse","description":"POST /dataset 响应"},"CreateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":20,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"知识库使用的embedding模型","default":""},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"CreateKnowledgeBaseRequest"},"CreateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"CreateKnowledgeBaseResponse","description":"POST /kb 响应"},"CreateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"CreateRoleRequest"},"CreateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"CreateRoleResponse","description":"POST /role 响应"},"CreateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"CreateTeamRequest"},"CreateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"CreateTeamResponse","description":"POST /team 响应"},"CreateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":30,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"测试数据集id"},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"general"}},"type":"object","required":["datasetId","llmId"],"title":"CreateTestingRequest"},"CreateTestingResponsing":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"CreateTestingResponsing","description":"POST /testing 响应"},"Data":{"properties":{"dataId":{"type":"string","format":"uuid","title":"Dataid","description":"数据ID"},"docName":{"type":"string","title":"Docname","description":"数据关联的文档名称"},"question":{"type":"string","title":"Question","description":"数据的问题"},"answer":{"type":"string","title":"Answer","description":"数据的答案"},"chunk":{"type":"string","title":"Chunk","description":"数据的片段"}},"type":"object","required":["dataId","docName","question","answer","chunk"],"title":"Data"},"DataSetStatus":{"type":"string","enum":["idle","pending","generating","exporting","importing","deleted"],"title":"DataSetStatus","description":"数据集状态"},"Dataset":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"数据集描述","max":150},"dataCnt":{"type":"integer","title":"Datacnt","description":"数据集数据数量"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"数据集进行上下文关联","default":false},"llm":{"allOf":[{"$ref":"#/components/schemas/LLM"}],"description":"生成数据集使用的大模型信息"},"generateTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"数据集生成任务"},"score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Score","description":"数据集评分"},"authorName":{"type":"string","title":"Authorname","description":"数据集创建者的用户名"},"status":{"$ref":"#/components/schemas/DataSetStatus"}},"type":"object","required":["datasetId","datasetName","description","dataCnt","llm","generateTask","authorName","status"],"title":"Dataset","description":"数据集信息"},"DatasetTesting":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称"},"testings":{"items":{"$ref":"#/components/schemas/Testing"},"type":"array","title":"Testings","description":"测试列表","default":[]}},"type":"object","required":["datasetId","datasetName"],"title":"DatasetTesting","description":"数据集测试信息"},"DeleteDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据集ID列表","default":[]}},"type":"object","title":"DeleteDatasetResponse","description":"DELETE /dataset 响应"},"DeleteDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"DeleteDocumentResponse","description":"DELETE /doc 响应"},"DeleteKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"知识库ID列表","default":[]}},"type":"object","title":"DeleteKnowledgeBaseResponse","description":"DELETE /kb 响应"},"DeleteRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"角色ID列表","default":[]}},"type":"object","title":"DeleteRoleResponse","description":"DELETE /role 响应"},"DeleteTaskResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"DeleteTaskResponse","description":"DELETE /task 响应"},"DeleteTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"DeleteTeamResponse","description":"DELETE /team 响应"},"DeleteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"团队成员ID列表","default":[]}},"type":"object","title":"DeleteTeamUserResponse","description":"DELETE /team/usr 响应"},"DeleteTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"测试ID列表","default":[]}},"type":"object","title":"DeleteTestingResponse","description":"DELETE /testing 响应"},"DeleteUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"消息ID列表","default":[]}},"type":"object","title":"DeleteUserMessageResponse","description":"DELETE /usr_msg 响应"},"Document":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"docType":{"allOf":[{"$ref":"#/components/schemas/DocumentType-Output"}],"description":"文档类型"},"chunkSize":{"type":"integer","title":"Chunksize","description":"文档分片大小"},"createdTime":{"type":"string","title":"Createdtime","description":"文档创建时间"},"docTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"文档任务"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"文档解析方法"},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用"},"authorName":{"type":"string","title":"Authorname","description":"文档创建者的用户名"},"status":{"$ref":"#/components/schemas/DocumentStatus"}},"type":"object","required":["docId","docName","docType","chunkSize","createdTime","docTask","parseMethod","enabled","authorName","status"],"title":"Document","description":"文档信息"},"DocumentStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"DocumentStatus","description":"文档状态"},"DocumentType-Input":{"properties":{"doc_type_id":{"type":"string","format":"uuid","title":"Doc Type Id"},"doc_type_name":{"type":"string","maxLength":20,"minLength":1,"title":"Doc Type Name","default":"这是一个默认的文档类型名称"}},"type":"object","required":["doc_type_id"],"title":"DocumentType"},"DocumentType-Output":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型ID"},"docTypeName":{"type":"string","title":"Doctypename","description":"文档类型名称"}},"type":"object","required":["docTypeId","docTypeName"],"title":"DocumentType","description":"文档类型信息"},"ExportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"任务ID"}},"type":"object","title":"ExportDatasetResponse","description":"POST /dataset/export 响应"},"ExportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ExportKnowledgeBaseResponse","description":"POST /kb/export 响应"},"GenerateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"GenerateDatasetResponse","description":"POST /dataset/generate 响应"},"GetDocumentReportResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","title":"Result","description":"文档报告数据结构","default":""}},"type":"object","title":"GetDocumentReportResponse","description":"GET /doc/report 响应"},"GetTaskReportResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","title":"Result","description":"任务报告","default":""}},"type":"object","title":"GetTaskReportResponse","description":"GET /task/report 响应"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"任务ID"}},"type":"object","title":"ImportDatasetResponse","description":"POST /dataset/import 响应"},"ImportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ImportKnowledgeBaseResponse","description":"POST /kb/import 响应"},"InviteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"邀请ID"}},"type":"object","title":"InviteTeamUserResponse","description":"POST /team/invitation 响应"},"IsDatasetHaveTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"boolean","title":"Result","description":"数据集是否有测试任务","default":false}},"type":"object","title":"IsDatasetHaveTestingResponse","description":"GET /dataset/testing/exist 响应"},"JoinTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"申请ID"}},"type":"object","title":"JoinTeamResponse","description":"POST /team/application 响应"},"Knowledgebase":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"知识库ID"},"kbName":{"type":"string","title":"Kbname","description":"知识库名称","max":20,"min":1},"authorName":{"type":"string","title":"Authorname","description":"知识库创建者的用户名"},"tokenizer":{"$ref":"#/components/schemas/Tokenizer"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"嵌入模型"},"description":{"type":"string","title":"Description","description":"知识库描述","max":150},"docCnt":{"type":"integer","title":"Doccnt","description":"知识库文档数量"},"docSize":{"type":"integer","title":"Docsize","description":"知识库文档大小"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库单次文件上传数量限制"},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库单次文件上传大小限制"},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"默认解析方法"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"默认分块大小"},"createdTime":{"type":"string","title":"Createdtime","description":"知识库创建时间"},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Doctypes","description":"知识库文档类型列表","default":[]}},"type":"object","required":["kbId","kbName","authorName","tokenizer","embeddingModel","description","docCnt","docSize","uploadCountLimit","uploadSizeLimit","defaultParseMethod","defaultChunkSize","createdTime"],"title":"Knowledgebase","description":"知识库信息"},"LLM":{"properties":{"llmId":{"type":"string","title":"Llmid","description":"大模型ID"},"llmName":{"type":"string","title":"Llmname","description":"大模型名称","max":20,"min":1},"llmIcon":{"type":"string","title":"Llmicon","description":"大模型图标"}},"type":"object","required":["llmId","llmName","llmIcon"],"title":"LLM"},"ListActionMsg":{"properties":{"actionTypes":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Actiontypes","description":"操作类型列表","default":[]}},"type":"object","title":"ListActionMsg","description":"GET /role/action 数据结构"},"ListActionResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListActionMsg"}],"description":"操作列表数据结构","default":{"actionTypes":[]}}},"type":"object","title":"ListActionResponse"},"ListAllKnowledgeBaseMsg":{"properties":{"teamKnowledgebases":{"items":{"$ref":"#/components/schemas/TeamKnowledgebase"},"type":"array","title":"Teamknowledgebases","description":"团队知识库列表","default":[]}},"type":"object","title":"ListAllKnowledgeBaseMsg","description":"GET /kb 数据结构"},"ListAllKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListAllKnowledgeBaseMsg"}],"description":"团队知识库列表数据结构","default":{"teamKnowledgebases":[]}}},"type":"object","title":"ListAllKnowledgeBaseResponse","description":"GET /kb 响应"},"ListChunkMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","title":"ListChunkMsg","description":"GET /chunk 数据结构"},"ListChunkRequest":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档id"},"text":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Text","description":"分块文本内容"},"type":{"anyOf":[{"$ref":"#/components/schemas/ChunkType"},{"type":"null"}],"description":"分块类型"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["docId"],"title":"ListChunkRequest"},"ListChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListChunkMsg"}],"description":"分片列表数据结构","default":{"total":0,"chunks":[]}}},"type":"object","title":"ListChunkResponse","description":"GET /chunk 响应"},"ListDataInDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"data":{"items":{"$ref":"#/components/schemas/Data"},"type":"array","title":"Data","description":"数据列表","default":[]}},"type":"object","title":"ListDataInDatasetMsg","description":"GET /dataset/data 数据结构"},"ListDataInDatasetRequest":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["datasetId"],"title":"ListDataInDatasetRequest"},"ListDataInDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDataInDatasetMsg"}],"description":"数据列表数据结构","default":{"total":0,"data":[]}}},"type":"object","title":"ListDataInDatasetResponse","description":"GET /dataset/data 响应"},"ListDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasets":{"items":{"$ref":"#/components/schemas/Dataset"},"type":"array","title":"Datasets","description":"数据集列表","default":[]}},"type":"object","title":"ListDatasetMsg","description":"GET /dataset 数据结构"},"ListDatasetRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"datasetId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Datasetid","description":"数据集id"},"datasetName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Datasetname","description":"数据集名称"},"dataCnt":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集数据数量","default":"desc"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"数据集使用的大模型id"},"isDataCleared":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Isdatacleared","description":"数据集是否清洗"},"isChunkRelated":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Ischunkrelated","description":"数据集是否上下文关联"},"generateStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Generatestatus","description":"数据集生成状态"},"scoreOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集评分的排序方法","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"数据集创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListDatasetRequest"},"ListDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDatasetMsg"}],"description":"数据集列表数据结构","default":{"total":0,"datasets":[]}}},"type":"object","title":"ListDatasetResponse","description":"GET /dataset 响应"},"ListDocumentMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"documents":{"items":{"$ref":"#/components/schemas/Document"},"type":"array","title":"Documents","description":"文档列表","default":[]}},"type":"object","title":"ListDocumentMsg","description":"GET /doc 数据结构"},"ListDocumentRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"docId":{"anyOf":[{"type":"string","maxLength":30,"minLength":1,"format":"uuid"},{"type":"null"}],"title":"Docid","description":"文档id"},"docName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Docname","description":"文档名称"},"docTypeId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Doctypeid","description":"文档类型id"},"parseStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Parsestatus","description":"文档解析状态"},"parseMethod":{"anyOf":[{"$ref":"#/components/schemas/ParseMethod"},{"type":"null"}],"description":"文档解析方法"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"文档创建者"},"createdTimeStart":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimestart","description":"文档创建时间开始"},"createdTimeEnd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimeend","description":"文档创建时间结束"},"createdTimeOrder":{"allOf":[{"$ref":"#/components/schemas/OrderType"}],"description":"文档创建时间排序","default":"desc"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListDocumentRequest"},"ListDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDocumentMsg"}],"description":"文档列表数据结构","default":{"total":0,"documents":[]}}},"type":"object","title":"ListDocumentResponse","description":"GET /doc 响应"},"ListDocumentTypesResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Result","description":"文档类型列表数据结构","default":[]}},"type":"object","title":"ListDocumentTypesResponse","description":"GET /kb/doc_type 响应"},"ListEmbeddingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"向量化模型的列表数据结构","default":[]}},"type":"object","title":"ListEmbeddingResponse","description":"GET /other/embedding 数据结构"},"ListKnowledgeBaseMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表数据结构","default":[]}},"type":"object","title":"ListKnowledgeBaseMsg"},"ListKnowledgeBaseRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"kbName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname","description":"资产名称"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"资产创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListKnowledgeBaseRequest"},"ListKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListKnowledgeBaseMsg"}],"default":{"total":0,"kbList":[]}}},"type":"object","title":"ListKnowledgeBaseResponse","description":"GET /kb/team 响应"},"ListLLMMsg":{"properties":{"llms":{"items":{"$ref":"#/components/schemas/LLM"},"type":"array","title":"Llms","description":"大模型列表","default":[]}},"type":"object","title":"ListLLMMsg","description":"GET /other/llm 数据结构"},"ListLLMResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListLLMMsg"}],"description":"大模型列表数据结构","default":{"llms":[]}}},"type":"object","title":"ListLLMResponse","description":"GET /other/llm 响应"},"ListParseMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"解析方法的列表数据结构","default":[]}},"type":"object","title":"ListParseMethodResponse","description":"\"GET /other/parse_method 响应"},"ListRoleMsg":{"properties":{"roles":{"items":{"$ref":"#/components/schemas/role"},"type":"array","title":"Roles","description":"角色列表","default":[]}},"type":"object","title":"ListRoleMsg","description":"GET /role 数据结构"},"ListRoleRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"roleId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Roleid","description":"角色id"},"roleName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rolename","description":"角色名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListRoleRequest"},"ListRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListRoleMsg"}],"description":"角色列表数据结构","default":{"roles":[]}}},"type":"object","title":"ListRoleResponse","description":"GET /role 响应"},"ListSearchMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"搜索方法的列表数据结构","default":[]}},"type":"object","title":"ListSearchMethodResponse","description":"GET /other/search_method 响应"},"ListTaskMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"tasks":{"items":{"$ref":"#/components/schemas/Task"},"type":"array","title":"Tasks","description":"任务列表","default":[]}},"type":"object","title":"ListTaskMsg","description":"GET /task 数据结构"},"ListTaskRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"taskId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Taskid","description":"任务id"},"taskType":{"anyOf":[{"$ref":"#/components/schemas/TaskType"},{"type":"null"}],"description":"任务类型"},"taskStatus":{"anyOf":[{"$ref":"#/components/schemas/TaskStatus"},{"type":"null"}],"description":"任务状态"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTaskRequest"},"ListTaskResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTaskMsg"}],"description":"任务列表数据结构","default":{"total":0,"tasks":[]}}},"type":"object","title":"ListTaskResponse","description":"GET /task 响应"},"ListTeamMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teams":{"items":{"$ref":"#/components/schemas/Team"},"type":"array","title":"Teams","description":"团队列表","default":[]}},"type":"object","title":"ListTeamMsg","description":"GET /team 数据结构"},"ListTeamMsgMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamMsgs":{"items":{"$ref":"#/components/schemas/TeamMsg"},"type":"array","title":"Teammsgs","description":"团队消息列表","default":[]}},"type":"object","title":"ListTeamMsgMsg","description":"GET /team/msg 数据结构"},"ListTeamMsgRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamMsgRequest"},"ListTeamMsgResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsgMsg"}],"description":"团队消息列表数据结构","default":{"total":0,"teamMsgs":[]}}},"type":"object","title":"ListTeamMsgResponse"},"ListTeamRequest":{"properties":{"teamType":{"allOf":[{"$ref":"#/components/schemas/TeamType"}],"description":"团队类型","default":"all"},"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"teamName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Teamname","description":"团队名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamRequest"},"ListTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsg"}],"description":"团队列表数据结构","default":{"total":0,"teams":[]}}},"type":"object","title":"ListTeamResponse","description":"GET /team 响应"},"ListTeamUserMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamUsers":{"items":{"$ref":"#/components/schemas/TeamUser"},"type":"array","title":"Teamusers","description":"团队成员列表","default":[]}},"type":"object","title":"ListTeamUserMsg","description":"GET /team/usr 数据结构"},"ListTeamUserRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"userSub":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Usersub","description":"用户ID"},"userName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Username","description":"用户名"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListTeamUserRequest"},"ListTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamUserMsg"}],"description":"团队成员列表数据结构","default":{"total":0,"teamUsers":[]}}},"type":"object","title":"ListTeamUserResponse"},"ListTestCaseMsg":{"properties":{"testingTestcases":{"items":{"$ref":"#/components/schemas/TestingTestCase"},"type":"array","title":"Testingtestcases","description":"测试用例列表","default":[]}},"type":"object","title":"ListTestCaseMsg","description":"GET /testing/testcase 数据结构"},"ListTestCaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTestCaseMsg"}],"description":"测试用例列表数据结构","default":{"testingTestcases":[]}}},"type":"object","title":"ListTestCaseResponse","description":"GET /testing/testcase 响应"},"ListTestingMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasetTestings":{"items":{"$ref":"#/components/schemas/DatasetTesting"},"type":"array","title":"Datasettestings","description":"数据集测试列表","default":[]}},"type":"object","title":"ListTestingMsg","description":"GET /testing 数据结构"},"ListTestingRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"testingId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Testingid","description":"测试id"},"testingName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Testingname","description":"测试名称"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"测试使用的大模型id"},"runStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Runstatus","description":"测试运行状态"},"scoresOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"测试评分","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"测试创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTestingRequest"},"ListTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTestingMsg"}],"description":"测试列表数据结构","default":{"total":0,"datasetTestings":[]}}},"type":"object","title":"ListTestingResponse","description":"GET /testing 响应"},"ListTokenizerResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"分词器的列表数据结构","default":[]}},"type":"object","title":"ListTokenizerResponse","description":"GET /other/tokenizer 响应"},"ListUserMessageMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"userMessages":{"items":{"$ref":"#/components/schemas/UserMsg"},"type":"array","title":"Usermessages","description":"用户消息列表","default":[]}},"type":"object","title":"ListUserMessageMsg","description":"GET /usr_msg 数据结构"},"ListUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListUserMessageMsg"}],"description":"用户消息列表数据结构","default":{"total":0,"userMessages":[]}}},"type":"object","title":"ListUserMessageResponse"},"OrderType":{"type":"string","enum":["asc","desc"],"title":"OrderType","description":"排序"},"ParseDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"文档ID"}},"type":"object","title":"ParseDocumentResponse","description":"POST /doc/parse 响应"},"ParseMethod":{"type":"string","enum":["general","ocr","enhanced","qa"],"title":"ParseMethod","description":"解析方法"},"RunTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"RunTestingResponse","description":"POST /testing/run 响应"},"SearchMethod":{"type":"string","enum":["general","doc2chunk","doc2chunk_bfs","enhanced_by_llm"],"title":"SearchMethod","description":"搜索方法"},"Task":{"properties":{"opId":{"type":"string","format":"uuid","title":"Opid","description":"关联实体ID"},"opName":{"type":"string","title":"Opname","description":"关联实体的名称"},"taskId":{"type":"string","format":"uuid","title":"Taskid","description":"任务ID"},"taskStatus":{"$ref":"#/components/schemas/TaskStatus"},"taskType":{"$ref":"#/components/schemas/TaskType"},"taskCompleted":{"type":"number","title":"Taskcompleted","description":"任务完成度"},"createdTime":{"type":"string","title":"Createdtime","description":"任务创建时间"}},"type":"object","required":["opId","opName","taskId","taskStatus","taskType","taskCompleted","createdTime"],"title":"Task","description":"任务信息"},"TaskStatus":{"type":"string","enum":["pending","running","success","failed","canceled","deleted"],"title":"TaskStatus","description":"任务状态"},"TaskType":{"type":"string","enum":["doc_parse","kb_export","kb_import","dataset_export","dataset_import","dataset_generate","tetsing_run"],"title":"TaskType","description":"任务类型"},"Team":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","description":"团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","description":"团队描述"},"authorName":{"type":"string","title":"Authorname","description":"团队创建者的用户ID"},"memberCount":{"type":"integer","title":"Membercount","description":"团队成员数量"},"isPublic":{"type":"boolean","title":"Ispublic","description":"是否为公开团队"},"createdTime":{"type":"string","title":"Createdtime","description":"团队创建时间"}},"type":"object","required":["teamId","teamName","description","authorName","memberCount","isPublic","createdTime"],"title":"Team","description":"团队信息"},"TeamKnowledgebase":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","title":"Teamname","description":"团队名称"},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表","default":[]}},"type":"object","required":["teamId","teamName"],"title":"TeamKnowledgebase","description":"团队知识库信息"},"TeamMsg":{"properties":{"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"authorName":{"type":"string","title":"Authorname","description":"消息发送者的用户名"},"message":{"type":"string","title":"Message","description":"消息内容"}},"type":"object","required":["msgId","authorName","message"],"title":"TeamMsg","description":"团队信息"},"TeamType":{"type":"string","enum":["mycreated","myjoined","all"],"title":"TeamType","description":"团队类型"},"TeamUser":{"properties":{"userId":{"type":"string","format":"uuid","title":"Userid","description":"用户ID"},"userName":{"type":"string","title":"Username","description":"用户名"},"roleName":{"type":"string","title":"Rolename","description":"角色名称"}},"type":"object","required":["userId","userName","roleName"],"title":"TeamUser","description":"团队成员信息"},"TestCase":{"properties":{"testCaseId":{"type":"string","format":"uuid","title":"Testcaseid","description":"测试用例ID"},"question":{"type":"string","title":"Question","description":"问题"},"answer":{"type":"string","title":"Answer","description":"标准答案"},"llm_answer":{"type":"string","title":"Llm Answer","description":"大模型的回答"},"related_chunk":{"type":"string","title":"Related Chunk","description":"检索到的片段"},"doc_name":{"type":"string","title":"Doc Name","description":"来源文档"},"score":{"type":"number","title":"Score","description":"综合得分"},"pre":{"type":"number","title":"Pre","description":"精确率"},"rec":{"type":"number","title":"Rec","description":"召回率"},"fai":{"type":"number","title":"Fai","description":"忠实值"},"rel":{"type":"number","title":"Rel","description":"可解释性"},"lcs":{"type":"number","title":"Lcs","description":"最长公共子串得分"},"leve":{"type":"number","title":"Leve","description":"编辑距离得分"},"jac":{"type":"number","title":"Jac","description":"杰卡德相似系数"}},"type":"object","required":["testCaseId","question","answer","llm_answer","related_chunk","doc_name","score","pre","rec","fai","rel","lcs","leve","jac"],"title":"TestCase","description":"测试用例信息"},"Testing":{"properties":{"testingId":{"type":"string","format":"uuid","title":"Testingid","description":"测试ID"},"testingName":{"type":"string","title":"Testingname","description":"测试名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"测试描述","max":150},"llm":{"allOf":[{"$ref":"#/components/schemas/LLM"}],"description":"测试使用的大模型信息"},"searchMethod":{"$ref":"#/components/schemas/SearchMethod"},"testingTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"测试任务"},"score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Score","description":"测试评分"},"authorName":{"type":"string","title":"Authorname","description":"测试创建者的用户名"},"status":{"$ref":"#/components/schemas/TestingStatus"}},"type":"object","required":["testingId","testingName","description","llm","searchMethod","testingTask","authorName","status"],"title":"Testing","description":"测试信息"},"TestingStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"TestingStatus","description":"测试状态"},"TestingTestCase":{"properties":{"aveScore":{"type":"number","title":"Avescore","description":"平均综合得分"},"avePre":{"type":"number","title":"Avepre","description":"平均精确率"},"aveRec":{"type":"number","title":"Averec","description":"平均召回率"},"aveFai":{"type":"number","title":"Avefai","description":"平均忠实值"},"aveRel":{"type":"number","title":"Averel","description":"平均可解释性"},"aveLcs":{"type":"number","title":"Avelcs","description":"平均最长公共子串得分"},"aveLeve":{"type":"number","title":"Aveleve","description":"平均编辑距离得分"},"aveJac":{"type":"number","title":"Avejac","description":"平均杰卡德相似系数"},"testCases":{"items":{"$ref":"#/components/schemas/TestCase"},"type":"array","title":"Testcases","description":"测试用例列表","default":[]}},"type":"object","required":["aveScore","avePre","aveRec","aveFai","aveRel","aveLcs","aveLeve","aveJac"],"title":"TestingTestCase","description":"GET /testing/testcase 数据结构"},"Tokenizer":{"type":"string","enum":["中文","en","mix"],"title":"Tokenizer","description":"分词器"},"TypeAction":{"properties":{"actionType":{"$ref":"#/components/schemas/ActionType"},"actions":{"items":{"$ref":"#/components/schemas/action"},"type":"array","title":"Actions","description":"操作列表","default":[]}},"type":"object","required":["actionType"],"title":"TypeAction","description":"不同类别的类别操作"},"UpdateChunkRequest":{"properties":{"text":{"type":"string","minLength":1,"title":"Text","description":"分块文本内容"},"enabled":{"type":"boolean","title":"Enabled","description":"分块是否启用","default":true}},"type":"object","required":["text"],"title":"UpdateChunkRequest"},"UpdateChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"分片ID"}},"type":"object","title":"UpdateChunkResponse","description":"PUT /chunk 响应"},"UpdateDatasetRequest":{"properties":{"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""}},"type":"object","title":"UpdateDatasetRequest"},"UpdateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"UpdateDatasetResponse","description":"PUT /dataset 响应"},"UpdateDocumentRequest":{"properties":{"docName":{"type":"string","maxLength":150,"minLength":1,"title":"Docname","default":"这是一个默认的文档名称"},"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"chunkSize":{"type":"integer","title":"Chunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用","default":true}},"type":"object","title":"UpdateDocumentRequest"},"UpdateDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"文档ID"}},"type":"object","title":"UpdateDocumentResponse","description":"PUT /doc 响应"},"UpdateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":30,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"UpdateKnowledgeBaseRequest"},"UpdateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"UpdateKnowledgeBaseResponse","description":"PUT /kb 响应"},"UpdateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"UpdateRoleRequest"},"UpdateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"UpdateRoleResponse","description":"PUT /role 响应"},"UpdateTeamAuthorResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamAuthorResponse","description":"PUT /team/author 响应"},"UpdateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"UpdateTeamRequest"},"UpdateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamResponse","description":"PUT /team 响应"},"UpdateTeamUserRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队成员ID"}},"type":"object","title":"UpdateTeamUserRoleResponse","description":"PUT /team/usr 响应"},"UpdateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":150,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"测试数据集id"},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"general"}},"type":"object","required":["datasetId","llmId"],"title":"UpdateTestingRequest"},"UpdateTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"UpdateTestingResponse","description":"PUT /testing 响应"},"UpdateUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"消息ID"}},"type":"object","title":"UpdateUserMessageResponse","description":"PUT /usr_msg 响应"},"UploadDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"文档ID"}},"type":"object","title":"UploadDocumentResponse","description":"POST /doc 响应"},"UserMessageStatus":{"type":"string","enum":["unread","accepted","rejected"],"title":"UserMessageStatus","description":"用户消息状态"},"UserMessageType":{"type":"string","enum":["invitation","application"],"title":"UserMessageType","description":"用户消息类型"},"UserMsg":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"senderId":{"type":"string","format":"uuid","title":"Senderid","description":"发送者ID"},"senderName":{"type":"string","title":"Sendername","description":"发送者名称"},"receiverId":{"type":"string","format":"uuid","title":"Receiverid","description":"接收者ID"},"receiverName":{"type":"string","title":"Receivername","description":"接收者名称"},"msgType":{"allOf":[{"$ref":"#/components/schemas/UserMessageType"}],"description":"消息类型"},"msgStatus":{"allOf":[{"$ref":"#/components/schemas/UserMessageStatus"}],"description":"消息状态"},"createdTime":{"type":"string","title":"Createdtime","description":"创建时间"}},"type":"object","required":["teamId","msgId","senderId","senderName","receiverId","receiverName","msgType","msgStatus","createdTime"],"title":"UserMsg","description":"用户消息"},"UserStatus":{"type":"string","enum":["active","inactive","deleted"],"title":"UserStatus","description":"用户状态"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"action":{"properties":{"actionName":{"type":"string","title":"Actionname","description":"操作名称","max":20,"min":1},"action":{"type":"string","title":"Action","description":"操作","max":20,"min":1},"isUsed":{"type":"boolean","title":"Isused","description":"是否启用"}},"type":"object","required":["actionName","action","isUsed"],"title":"action","description":"操作信息"},"role":{"properties":{"roleId":{"type":"string","format":"uuid","title":"Roleid","description":"角色ID"},"roleName":{"type":"string","title":"Rolename","description":"角色名称","max":20,"min":1},"typeActions":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Typeactions","description":"操作类型列表","default":[]}},"type":"object","required":["roleId","roleName"],"title":"role","description":"角色信息"}}}} \ No newline at end of file +{"openapi":"3.1.0","info":{"title":"FastAPI","version":"0.1.0"},"paths":{"/routes":{"get":{"summary":"Get All Routes","operationId":"get_all_routes_routes_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/team/list":{"post":{"tags":["Team"],"summary":"List Teams","operationId":"list_teams_team_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/usr":{"post":{"tags":["Team"],"summary":"List Team User By Team Id","operationId":"list_team_user_by_team_id_team_usr_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_usr_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamUserRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team User By Team Id And User Subs","operationId":"delete_team_user_by_team_id_and_user_subs_team_usr_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string"},"title":"Usersub"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/msg":{"post":{"tags":["Team"],"summary":"List Team Msg By Team Id","operationId":"list_team_msg_by_team_id_team_msg_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team":{"post":{"tags":["Team"],"summary":"Create Team","operationId":"create_team_team_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team By Team Id","operationId":"delete_team_by_team_id_team_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/invitation":{"post":{"tags":["Team"],"summary":"Invite Team User By User Sub","operationId":"invite_team_user_by_user_sub_team_invitation_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"userSubInvite","in":"query","required":true,"schema":{"type":"string","title":"Usersubinvite"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/InviteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/application":{"post":{"tags":["Team"],"summary":"Join Team","operationId":"join_team_team_application_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JoinTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/author":{"put":{"tags":["Team"],"summary":"Update Team Author By Team Id","operationId":"update_team_author_by_team_id_team_author_put","parameters":[{"name":"recriverSub","in":"query","required":true,"schema":{"type":"string","title":"Recriversub"}},{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamAuthorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb":{"get":{"tags":["Knowledge Base"],"summary":"List Kb By User Sub","operationId":"list_kb_by_user_sub_kb_get","parameters":[{"name":"kbName","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListAllKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"tags":["Knowledge Base"],"summary":"Create Kb","operationId":"create_kb_kb_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Knowledge Base"],"summary":"Update Kb By Kb Id","operationId":"update_kb_by_kb_id_kb_put","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Knowledge Base"],"summary":"Delete Kb By Kb Ids","operationId":"delete_kb_by_kb_ids_kb_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/team":{"post":{"tags":["Knowledge Base"],"summary":"List Kb By Team Id","operationId":"list_kb_by_team_id_kb_team_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/doc_type":{"get":{"tags":["Knowledge Base"],"summary":"List Doc Types By Kb Id","operationId":"list_doc_types_by_kb_id_kb_doc_type_get","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentTypesResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/download":{"get":{"tags":["Knowledge Base"],"summary":"Download Kb By Task Id","operationId":"download_kb_by_task_id_kb_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/import":{"post":{"tags":["Knowledge Base"],"summary":"Import Kbs","operationId":"import_kbs_kb_import_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_kbs_kb_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/export":{"post":{"tags":["Knowledge Base"],"summary":"Export Kb By Kb Ids","operationId":"export_kb_by_kb_ids_kb_export_post","parameters":[{"name":"kbIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/list":{"post":{"tags":["Chunk"],"summary":"List Chunks By Document Id","operationId":"list_chunks_by_document_id_chunk_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk":{"put":{"tags":["Chunk"],"summary":"Update Chunk By Id","operationId":"update_chunk_by_id_chunk_put","parameters":[{"name":"chunkId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Chunkid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/list":{"post":{"tags":["Document"],"summary":"List Doc","operationId":"list_doc_doc_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/download":{"get":{"tags":["Document"],"summary":"Download Doc By Id","operationId":"download_doc_by_id_doc_download_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/report":{"get":{"tags":["Document"],"summary":"Get Doc Report","operationId":"get_doc_report_doc_report_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetDocumentReportResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc":{"post":{"tags":["Document"],"summary":"Upload Docs","operationId":"upload_docs_doc_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_upload_docs_doc_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UploadDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Document"],"summary":"Update Doc By Doc Id","operationId":"update_doc_by_doc_id_doc_put","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/parse":{"post":{"tags":["Document"],"summary":"Parse Docuement By Doc Ids","operationId":"parse_docuement_by_doc_ids_doc_parse_post","parameters":[{"name":"docIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}},{"name":"parse","in":"query","required":true,"schema":{"type":"boolean","title":"Parse"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ParseDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/health_check":{"get":{"tags":["Health check"],"summary":"Health Check","operationId":"health_check_health_check_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/dataset/list":{"post":{"tags":["Dataset"],"summary":"List Dataset By Kb Id","operationId":"list_dataset_by_kb_id_dataset_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/data":{"post":{"tags":["Dataset"],"summary":"List Data In Dataset","operationId":"list_data_in_dataset_dataset_data_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Data By Dataset Id","operationId":"update_data_by_dataset_id_dataset_data_put","parameters":[{"name":"dataId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Dataid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Data By Data Ids","operationId":"delete_data_by_data_ids_dataset_data_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Dataids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/testing/exist":{"get":{"tags":["Dataset"],"summary":"Is Dataset Have Testing","operationId":"is_dataset_have_testing_dataset_testing_exist_get","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/IsDatasetHaveTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/download":{"get":{"tags":["Dataset"],"summary":"Download Dataset By Task Id","operationId":"download_dataset_by_task_id_dataset_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset":{"post":{"tags":["Dataset"],"summary":"Create Dataset","operationId":"create_dataset_dataset_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Dataset By Dataset Id","operationId":"update_dataset_by_dataset_id_dataset_put","parameters":[{"name":"databaseId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Databaseid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Dataset By Dataset Ids","operationId":"delete_dataset_by_dataset_ids_dataset_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Databaseid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/import":{"post":{"tags":["Dataset"],"summary":"Import Dataset","operationId":"import_dataset_dataset_import_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_dataset_dataset_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/export":{"post":{"tags":["Dataset"],"summary":"Export Dataset By Dataset Ids","operationId":"export_dataset_by_dataset_ids_dataset_export_post","parameters":[{"name":"datasetIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Datasetids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/generate":{"post":{"tags":["Dataset"],"summary":"Generate Dataset By Id","operationId":"generate_dataset_by_id_dataset_generate_post","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}},{"name":"generate","in":"query","required":true,"schema":{"type":"boolean","title":"Generate"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/other/llm":{"get":{"tags":["Other"],"summary":"List Llms By User Sub","operationId":"list_llms_by_user_sub_other_llm_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListLLMResponse"}}}}}}},"/other/embedding":{"get":{"tags":["Other"],"summary":"List Embeddings","operationId":"list_embeddings_other_embedding_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListEmbeddingResponse"}}}}}}},"/other/tokenizer":{"get":{"tags":["Other"],"summary":"List Tokenizers","operationId":"list_tokenizers_other_tokenizer_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTokenizerResponse"}}}}}}},"/other/parse_method":{"get":{"tags":["Other"],"summary":"List Parse Method","operationId":"list_parse_method_other_parse_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListParseMethodResponse"}}}}}}},"/other/search_method":{"get":{"tags":["Other"],"summary":"List Search Method","operationId":"list_search_method_other_search_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListSearchMethodResponse"}}}}}}},"/testing/list":{"post":{"tags":["Testing"],"summary":"List Testing By Kb Id","operationId":"list_testing_by_kb_id_testing_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/testcase":{"post":{"tags":["Testing"],"summary":"List Testcase By Testing Id","operationId":"list_testcase_by_testing_id_testing_testcase_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestCaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/download":{"get":{"tags":["Testing"],"summary":"Download Testing Report By Testing Id","operationId":"download_testing_report_by_testing_id_testing_download_get","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing":{"post":{"tags":["Testing"],"summary":"Create Testing","operationId":"create_testing_testing_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingResponsing"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Testing"],"summary":"Update Testing By Testing Id","operationId":"update_testing_by_testing_id_testing_put","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Testing"],"summary":"Delete Testing By Testing Ids","operationId":"delete_testing_by_testing_ids_testing_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Testingids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/run":{"post":{"tags":["Testing"],"summary":"Run Testing By Testing Id","operationId":"run_testing_by_testing_id_testing_run_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}},{"name":"run","in":"query","required":true,"schema":{"type":"boolean","title":"Run"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/RunTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role/action":{"get":{"tags":["Role"],"summary":"List Actions","operationId":"list_actions_role_action_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListActionResponse"}}}}}}},"/role/list":{"post":{"tags":["Role"],"summary":"List Role By Team Id","operationId":"list_role_by_team_id_role_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role":{"post":{"tags":["Role"],"summary":"Create Role","operationId":"create_role_role_post","parameters":[{"name":"TeamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Role"],"summary":"Update Role By Role Id","operationId":"update_role_by_role_id_role_put","parameters":[{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Role"],"summary":"Delete Role By Role Ids","operationId":"delete_role_by_role_ids_role_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Roleid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg/list":{"post":{"tags":["User Message"],"summary":"List User Msgs By User Sub","operationId":"list_user_msgs_by_user_sub_usr_msg_list_post","parameters":[{"name":"msgType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserMessageType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg":{"put":{"tags":["User Message"],"summary":"Update User Msg By Msg Id","operationId":"update_user_msg_by_msg_id_usr_msg_put","parameters":[{"name":"msgId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Msgid"}},{"name":"msgStatus","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserStatus"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["User Message"],"summary":"Delete User Msg By Msg Ids","operationId":"delete_user_msg_by_msg_ids_usr_msg_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Msgids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task":{"post":{"tags":["Task"],"summary":"List Task","operationId":"list_task_task_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/one":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Id","operationId":"delete_task_by_task_id_task_one_delete","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByIdResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/all":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Type","operationId":"delete_task_by_task_type_task_all_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"taskType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/TaskType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByTypeResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"ActionType":{"type":"string","enum":["team","user","role","knowledge_base","document","chunk","dataset","testing","dataset_data","task"],"title":"ActionType","description":"操作类型"},"Body_import_dataset_dataset_import_post":{"properties":{"dataset_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Dataset Packages"}},"type":"object","required":["dataset_packages"],"title":"Body_import_dataset_dataset_import_post"},"Body_import_kbs_kb_import_post":{"properties":{"kb_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Kb Packages"}},"type":"object","required":["kb_packages"],"title":"Body_import_kbs_kb_import_post"},"Body_upload_docs_doc_post":{"properties":{"docs":{"items":{"type":"string","format":"binary"},"type":"array","title":"Docs"}},"type":"object","required":["docs"],"title":"Body_upload_docs_doc_post"},"Chunk":{"properties":{"chunkId":{"type":"string","format":"uuid","title":"Chunkid","description":"分片ID"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"分片类型"},"text":{"type":"string","title":"Text","description":"分片文本"}},"type":"object","required":["chunkId","chunkType","text"],"title":"Chunk","description":"文档分片信息"},"ChunkType":{"type":"string","enum":["text","table","image","code","link"],"title":"ChunkType","description":"分块类型"},"CreateDatasetRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""},"documentIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Documentids","description":"测试数据集关联的文档","default":[]},"dataCnt":{"type":"integer","title":"Datacnt","description":"测试数据集内的数据数量","default":64,"max":512,"min":1},"llmId":{"type":"string","title":"Llmid","description":"测试数据集使用的大模型id"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"测试数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"测试数据集进行上下文关联","default":false}},"type":"object","required":["kbId","llmId"],"title":"CreateDatasetRequest"},"CreateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集生成任务ID"}},"type":"object","title":"CreateDatasetResponse","description":"POST /dataset 响应"},"CreateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":20,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"知识库使用的embedding模型","default":""},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"CreateKnowledgeBaseRequest"},"CreateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"CreateKnowledgeBaseResponse","description":"POST /kb 响应"},"CreateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"CreateRoleRequest"},"CreateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"CreateRoleResponse","description":"POST /role 响应"},"CreateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"CreateTeamRequest"},"CreateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"CreateTeamResponse","description":"POST /team 响应"},"CreateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":30,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"测试数据集id"},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_embedding"}},"type":"object","required":["datasetId","llmId"],"title":"CreateTestingRequest"},"CreateTestingResponsing":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"CreateTestingResponsing","description":"POST /testing 响应"},"Data":{"properties":{"dataId":{"type":"string","format":"uuid","title":"Dataid","description":"数据ID"},"docName":{"type":"string","title":"Docname","description":"数据关联的文档名称"},"question":{"type":"string","title":"Question","description":"数据的问题"},"answer":{"type":"string","title":"Answer","description":"数据的答案"},"chunk":{"type":"string","title":"Chunk","description":"数据的片段"}},"type":"object","required":["dataId","docName","question","answer","chunk"],"title":"Data"},"DataSetStatus":{"type":"string","enum":["idle","pending","generating","exporting","importing","deleted"],"title":"DataSetStatus","description":"数据集状态"},"Dataset":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"数据集描述","max":150},"dataCnt":{"type":"integer","title":"Datacnt","description":"数据集数据数量"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"数据集进行上下文关联","default":false},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"生成数据集使用的大模型信息"},"generateTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"数据集生成任务"},"score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Score","description":"数据集评分"},"authorName":{"type":"string","title":"Authorname","description":"数据集创建者的用户名"},"status":{"$ref":"#/components/schemas/DataSetStatus"}},"type":"object","required":["datasetId","datasetName","description","dataCnt","authorName","status"],"title":"Dataset","description":"数据集信息"},"DatasetTesting":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称"},"testings":{"items":{"$ref":"#/components/schemas/Testing"},"type":"array","title":"Testings","description":"测试列表","default":[]}},"type":"object","required":["datasetId","datasetName"],"title":"DatasetTesting","description":"数据集测试信息"},"DeleteDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据ID列表","default":[]}},"type":"object","title":"DeleteDataResponse","description":"DELETE /dataset/data 响应"},"DeleteDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据集ID列表","default":[]}},"type":"object","title":"DeleteDatasetResponse","description":"DELETE /dataset 响应"},"DeleteDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"DeleteDocumentResponse","description":"DELETE /doc 响应"},"DeleteKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"知识库ID列表","default":[]}},"type":"object","title":"DeleteKnowledgeBaseResponse","description":"DELETE /kb 响应"},"DeleteRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"角色ID列表","default":[]}},"type":"object","title":"DeleteRoleResponse","description":"DELETE /role 响应"},"DeleteTaskByIdResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"任务ID"}},"type":"object","title":"DeleteTaskByIdResponse","description":"DELETE /task/one 响应"},"DeleteTaskByTypeResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"DeleteTaskByTypeResponse","description":"DELETE /task/all 响应"},"DeleteTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"DeleteTeamResponse","description":"DELETE /team 响应"},"DeleteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"团队成员ID列表","default":[]}},"type":"object","title":"DeleteTeamUserResponse","description":"DELETE /team/usr 响应"},"DeleteTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"测试ID列表","default":[]}},"type":"object","title":"DeleteTestingResponse","description":"DELETE /testing 响应"},"DeleteUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"消息ID列表","default":[]}},"type":"object","title":"DeleteUserMessageResponse","description":"DELETE /usr_msg 响应"},"Document":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"docType":{"allOf":[{"$ref":"#/components/schemas/DocumentType-Output"}],"description":"文档类型"},"chunkSize":{"type":"integer","title":"Chunksize","description":"文档分片大小"},"createdTime":{"type":"string","title":"Createdtime","description":"文档创建时间"},"docTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"文档任务"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"文档解析方法"},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用"},"authorName":{"type":"string","title":"Authorname","description":"文档创建者的用户名"},"status":{"$ref":"#/components/schemas/DocumentStatus"}},"type":"object","required":["docId","docName","docType","chunkSize","createdTime","docTask","parseMethod","enabled","authorName","status"],"title":"Document","description":"文档信息"},"DocumentStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"DocumentStatus","description":"文档状态"},"DocumentType-Input":{"properties":{"doc_type_id":{"type":"string","format":"uuid","title":"Doc Type Id"},"doc_type_name":{"type":"string","maxLength":20,"minLength":1,"title":"Doc Type Name","default":"这是一个默认的文档类型名称"}},"type":"object","required":["doc_type_id"],"title":"DocumentType"},"DocumentType-Output":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型ID"},"docTypeName":{"type":"string","title":"Doctypename","description":"文档类型名称"}},"type":"object","required":["docTypeId","docTypeName"],"title":"DocumentType","description":"文档类型信息"},"ExportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ExportDatasetResponse","description":"POST /dataset/export 响应"},"ExportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ExportKnowledgeBaseResponse","description":"POST /kb/export 响应"},"GenerateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"GenerateDatasetResponse","description":"POST /dataset/generate 响应"},"GetDocumentReportResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","title":"Result","description":"文档报告数据结构","default":""}},"type":"object","title":"GetDocumentReportResponse","description":"GET /doc/report 响应"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ImportDatasetResponse","description":"POST /dataset/import 响应"},"ImportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ImportKnowledgeBaseResponse","description":"POST /kb/import 响应"},"InviteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"邀请ID"}},"type":"object","title":"InviteTeamUserResponse","description":"POST /team/invitation 响应"},"IsDatasetHaveTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"boolean","title":"Result","description":"数据集是否有测试任务","default":false}},"type":"object","title":"IsDatasetHaveTestingResponse","description":"GET /dataset/testing/exist 响应"},"JoinTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"申请ID"}},"type":"object","title":"JoinTeamResponse","description":"POST /team/application 响应"},"Knowledgebase":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"知识库ID"},"kbName":{"type":"string","title":"Kbname","description":"知识库名称","max":20,"min":1},"authorName":{"type":"string","title":"Authorname","description":"知识库创建者的用户名"},"tokenizer":{"$ref":"#/components/schemas/Tokenizer"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"嵌入模型"},"description":{"type":"string","title":"Description","description":"知识库描述","max":150},"docCnt":{"type":"integer","title":"Doccnt","description":"知识库文档数量"},"docSize":{"type":"integer","title":"Docsize","description":"知识库文档大小"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库单次文件上传数量限制"},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库单次文件上传大小限制"},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"默认解析方法"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"默认分块大小"},"createdTime":{"type":"string","title":"Createdtime","description":"知识库创建时间"},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Doctypes","description":"知识库文档类型列表","default":[]}},"type":"object","required":["kbId","kbName","authorName","tokenizer","embeddingModel","description","docCnt","docSize","uploadCountLimit","uploadSizeLimit","defaultParseMethod","defaultChunkSize","createdTime"],"title":"Knowledgebase","description":"知识库信息"},"LLM":{"properties":{"llmId":{"type":"string","title":"Llmid","description":"大模型ID"},"llmName":{"type":"string","title":"Llmname","description":"大模型名称","max":20,"min":1},"llmIcon":{"type":"string","title":"Llmicon","description":"大模型图标"}},"type":"object","required":["llmId","llmName","llmIcon"],"title":"LLM"},"ListActionMsg":{"properties":{"actionTypes":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Actiontypes","description":"操作类型列表","default":[]}},"type":"object","title":"ListActionMsg","description":"GET /role/action 数据结构"},"ListActionResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListActionMsg"}],"description":"操作列表数据结构","default":{"actionTypes":[]}}},"type":"object","title":"ListActionResponse"},"ListAllKnowledgeBaseMsg":{"properties":{"teamKnowledgebases":{"items":{"$ref":"#/components/schemas/TeamKnowledgebase"},"type":"array","title":"Teamknowledgebases","description":"团队知识库列表","default":[]}},"type":"object","title":"ListAllKnowledgeBaseMsg","description":"GET /kb 数据结构"},"ListAllKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListAllKnowledgeBaseMsg"}],"description":"团队知识库列表数据结构","default":{"teamKnowledgebases":[]}}},"type":"object","title":"ListAllKnowledgeBaseResponse","description":"GET /kb 响应"},"ListChunkMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","title":"ListChunkMsg","description":"GET /chunk 数据结构"},"ListChunkRequest":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档id"},"text":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Text","description":"分块文本内容"},"type":{"anyOf":[{"$ref":"#/components/schemas/ChunkType"},{"type":"null"}],"description":"分块类型"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["docId"],"title":"ListChunkRequest"},"ListChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListChunkMsg"}],"description":"分片列表数据结构","default":{"total":0,"chunks":[]}}},"type":"object","title":"ListChunkResponse","description":"GET /chunk 响应"},"ListDataInDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"data":{"items":{"$ref":"#/components/schemas/Data"},"type":"array","title":"Data","description":"数据列表","default":[]}},"type":"object","title":"ListDataInDatasetMsg","description":"GET /dataset/data 数据结构"},"ListDataInDatasetRequest":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["datasetId"],"title":"ListDataInDatasetRequest"},"ListDataInDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDataInDatasetMsg"}],"description":"数据列表数据结构","default":{"total":0,"data":[]}}},"type":"object","title":"ListDataInDatasetResponse","description":"GET /dataset/data 响应"},"ListDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasets":{"items":{"$ref":"#/components/schemas/Dataset"},"type":"array","title":"Datasets","description":"数据集列表","default":[]}},"type":"object","title":"ListDatasetMsg","description":"GET /dataset 数据结构"},"ListDatasetRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"datasetId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Datasetid","description":"数据集id"},"datasetName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Datasetname","description":"数据集名称"},"dataCnt":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集数据数量","default":"desc"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"数据集使用的大模型id"},"isDataCleared":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Isdatacleared","description":"数据集是否清洗"},"isChunkRelated":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Ischunkrelated","description":"数据集是否上下文关联"},"generateStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Generatestatus","description":"数据集生成状态"},"scoreOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集评分的排序方法","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"数据集创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListDatasetRequest"},"ListDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDatasetMsg"}],"description":"数据集列表数据结构","default":{"total":0,"datasets":[]}}},"type":"object","title":"ListDatasetResponse","description":"GET /dataset 响应"},"ListDocumentMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"documents":{"items":{"$ref":"#/components/schemas/Document"},"type":"array","title":"Documents","description":"文档列表","default":[]}},"type":"object","title":"ListDocumentMsg","description":"GET /doc 数据结构"},"ListDocumentRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"docId":{"anyOf":[{"type":"string","maxLength":30,"minLength":1,"format":"uuid"},{"type":"null"}],"title":"Docid","description":"文档id"},"docName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Docname","description":"文档名称"},"docTypeId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Doctypeid","description":"文档类型id"},"parseStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Parsestatus","description":"文档解析状态"},"parseMethod":{"anyOf":[{"$ref":"#/components/schemas/ParseMethod"},{"type":"null"}],"description":"文档解析方法"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"文档创建者"},"createdTimeStart":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimestart","description":"文档创建时间开始"},"createdTimeEnd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimeend","description":"文档创建时间结束"},"createdTimeOrder":{"allOf":[{"$ref":"#/components/schemas/OrderType"}],"description":"文档创建时间排序","default":"desc"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListDocumentRequest"},"ListDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDocumentMsg"}],"description":"文档列表数据结构","default":{"total":0,"documents":[]}}},"type":"object","title":"ListDocumentResponse","description":"GET /doc 响应"},"ListDocumentTypesResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Result","description":"文档类型列表数据结构","default":[]}},"type":"object","title":"ListDocumentTypesResponse","description":"GET /kb/doc_type 响应"},"ListEmbeddingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"向量化模型的列表数据结构","default":[]}},"type":"object","title":"ListEmbeddingResponse","description":"GET /other/embedding 数据结构"},"ListKnowledgeBaseMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表数据结构","default":[]}},"type":"object","title":"ListKnowledgeBaseMsg"},"ListKnowledgeBaseRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"kbName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname","description":"资产名称"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"资产创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListKnowledgeBaseRequest"},"ListKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListKnowledgeBaseMsg"}],"default":{"total":0,"kbList":[]}}},"type":"object","title":"ListKnowledgeBaseResponse","description":"GET /kb/team 响应"},"ListLLMMsg":{"properties":{"llms":{"items":{"$ref":"#/components/schemas/LLM"},"type":"array","title":"Llms","description":"大模型列表","default":[]}},"type":"object","title":"ListLLMMsg","description":"GET /other/llm 数据结构"},"ListLLMResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListLLMMsg"}],"description":"大模型列表数据结构","default":{"llms":[]}}},"type":"object","title":"ListLLMResponse","description":"GET /other/llm 响应"},"ListParseMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"解析方法的列表数据结构","default":[]}},"type":"object","title":"ListParseMethodResponse","description":"\"GET /other/parse_method 响应"},"ListRoleMsg":{"properties":{"roles":{"items":{"$ref":"#/components/schemas/role"},"type":"array","title":"Roles","description":"角色列表","default":[]}},"type":"object","title":"ListRoleMsg","description":"GET /role 数据结构"},"ListRoleRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"roleId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Roleid","description":"角色id"},"roleName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rolename","description":"角色名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListRoleRequest"},"ListRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListRoleMsg"}],"description":"角色列表数据结构","default":{"roles":[]}}},"type":"object","title":"ListRoleResponse","description":"GET /role 响应"},"ListSearchMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"搜索方法的列表数据结构","default":[]}},"type":"object","title":"ListSearchMethodResponse","description":"GET /other/search_method 响应"},"ListTaskMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"tasks":{"items":{"$ref":"#/components/schemas/Task"},"type":"array","title":"Tasks","description":"任务列表","default":[]}},"type":"object","title":"ListTaskMsg","description":"GET /task 数据结构"},"ListTaskRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"taskId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Taskid","description":"任务id"},"taskType":{"anyOf":[{"$ref":"#/components/schemas/TaskType"},{"type":"null"}],"description":"任务类型"},"taskStatus":{"anyOf":[{"$ref":"#/components/schemas/TaskStatus"},{"type":"null"}],"description":"任务状态"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTaskRequest"},"ListTaskResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTaskMsg"}],"description":"任务列表数据结构","default":{"total":0,"tasks":[]}}},"type":"object","title":"ListTaskResponse","description":"GET /task 响应"},"ListTeamMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teams":{"items":{"$ref":"#/components/schemas/Team"},"type":"array","title":"Teams","description":"团队列表","default":[]}},"type":"object","title":"ListTeamMsg","description":"GET /team 数据结构"},"ListTeamMsgMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamMsgs":{"items":{"$ref":"#/components/schemas/TeamMsg"},"type":"array","title":"Teammsgs","description":"团队消息列表","default":[]}},"type":"object","title":"ListTeamMsgMsg","description":"GET /team/msg 数据结构"},"ListTeamMsgRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamMsgRequest"},"ListTeamMsgResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsgMsg"}],"description":"团队消息列表数据结构","default":{"total":0,"teamMsgs":[]}}},"type":"object","title":"ListTeamMsgResponse"},"ListTeamRequest":{"properties":{"teamType":{"allOf":[{"$ref":"#/components/schemas/TeamType"}],"description":"团队类型","default":"all"},"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"teamName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Teamname","description":"团队名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamRequest"},"ListTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsg"}],"description":"团队列表数据结构","default":{"total":0,"teams":[]}}},"type":"object","title":"ListTeamResponse","description":"GET /team 响应"},"ListTeamUserMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamUsers":{"items":{"$ref":"#/components/schemas/TeamUser"},"type":"array","title":"Teamusers","description":"团队成员列表","default":[]}},"type":"object","title":"ListTeamUserMsg","description":"GET /team/usr 数据结构"},"ListTeamUserRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"userSub":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Usersub","description":"用户ID"},"userName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Username","description":"用户名"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListTeamUserRequest"},"ListTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamUserMsg"}],"description":"团队成员列表数据结构","default":{"total":0,"teamUsers":[]}}},"type":"object","title":"ListTeamUserResponse"},"ListTestCaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/TestingTestCase"}],"description":"测试用例列表数据结构","default":{"aveScore":-1.0,"avePre":-1.0,"aveRec":-1.0,"aveFai":-1.0,"aveRel":-1.0,"aveLcs":-1.0,"aveLeve":-1.0,"aveJac":-1.0,"testCases":[]}}},"type":"object","title":"ListTestCaseResponse","description":"GET /testing/testcase 响应"},"ListTestingMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasetTestings":{"items":{"$ref":"#/components/schemas/DatasetTesting"},"type":"array","title":"Datasettestings","description":"数据集测试列表","default":[]}},"type":"object","title":"ListTestingMsg","description":"GET /testing 数据结构"},"ListTestingRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"testingId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Testingid","description":"测试id"},"testingName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Testingname","description":"测试名称"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"测试使用的大模型id"},"runStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Runstatus","description":"测试运行状态"},"scoresOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"测试评分","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"测试创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTestingRequest"},"ListTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTestingMsg"}],"description":"测试列表数据结构","default":{"total":0,"datasetTestings":[]}}},"type":"object","title":"ListTestingResponse","description":"GET /testing 响应"},"ListTokenizerResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"分词器的列表数据结构","default":[]}},"type":"object","title":"ListTokenizerResponse","description":"GET /other/tokenizer 响应"},"ListUserMessageMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"userMessages":{"items":{"$ref":"#/components/schemas/UserMsg"},"type":"array","title":"Usermessages","description":"用户消息列表","default":[]}},"type":"object","title":"ListUserMessageMsg","description":"GET /usr_msg 数据结构"},"ListUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListUserMessageMsg"}],"description":"用户消息列表数据结构","default":{"total":0,"userMessages":[]}}},"type":"object","title":"ListUserMessageResponse"},"OrderType":{"type":"string","enum":["asc","desc"],"title":"OrderType","description":"排序"},"ParseDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"ParseDocumentResponse","description":"POST /doc/parse 响应"},"ParseMethod":{"type":"string","enum":["general","ocr","enhanced","qa"],"title":"ParseMethod","description":"解析方法"},"RunTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"RunTestingResponse","description":"POST /testing/run 响应"},"SearchMethod":{"type":"string","enum":["keyword","embedding","keyword_embedding","doc2chunk","doc2chunk_bfs","enhanced_by_llm"],"title":"SearchMethod","description":"搜索方法"},"Task":{"properties":{"opId":{"type":"string","format":"uuid","title":"Opid","description":"关联实体ID"},"opName":{"type":"string","title":"Opname","description":"关联实体的名称"},"taskId":{"type":"string","format":"uuid","title":"Taskid","description":"任务ID"},"taskStatus":{"$ref":"#/components/schemas/TaskStatus"},"taskType":{"$ref":"#/components/schemas/TaskType"},"taskCompleted":{"type":"number","title":"Taskcompleted","description":"任务完成度"},"createdTime":{"type":"string","title":"Createdtime","description":"任务创建时间"}},"type":"object","required":["opId","opName","taskId","taskStatus","taskType","taskCompleted","createdTime"],"title":"Task","description":"任务信息"},"TaskStatus":{"type":"string","enum":["pending","running","success","failed","canceled","deleted"],"title":"TaskStatus","description":"任务状态"},"TaskType":{"type":"string","enum":["doc_parse","kb_export","kb_import","dataset_export","dataset_import","dataset_generate","testing_run"],"title":"TaskType","description":"任务类型"},"Team":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","description":"团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","description":"团队描述"},"authorName":{"type":"string","title":"Authorname","description":"团队创建者的用户ID"},"memberCount":{"type":"integer","title":"Membercount","description":"团队成员数量"},"isPublic":{"type":"boolean","title":"Ispublic","description":"是否为公开团队"},"createdTime":{"type":"string","title":"Createdtime","description":"团队创建时间"}},"type":"object","required":["teamId","teamName","description","authorName","memberCount","isPublic","createdTime"],"title":"Team","description":"团队信息"},"TeamKnowledgebase":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","title":"Teamname","description":"团队名称"},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表","default":[]}},"type":"object","required":["teamId","teamName"],"title":"TeamKnowledgebase","description":"团队知识库信息"},"TeamMsg":{"properties":{"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"authorName":{"type":"string","title":"Authorname","description":"消息发送者的用户名"},"message":{"type":"string","title":"Message","description":"消息内容"}},"type":"object","required":["msgId","authorName","message"],"title":"TeamMsg","description":"团队信息"},"TeamType":{"type":"string","enum":["mycreated","myjoined","all"],"title":"TeamType","description":"团队类型"},"TeamUser":{"properties":{"userId":{"type":"string","format":"uuid","title":"Userid","description":"用户ID"},"userName":{"type":"string","title":"Username","description":"用户名"},"roleName":{"type":"string","title":"Rolename","description":"角色名称"}},"type":"object","required":["userId","userName","roleName"],"title":"TeamUser","description":"团队成员信息"},"TestCase":{"properties":{"testCaseId":{"type":"string","format":"uuid","title":"Testcaseid","description":"测试用例ID"},"question":{"type":"string","title":"Question","description":"问题"},"answer":{"type":"string","title":"Answer","description":"标准答案"},"llm_answer":{"type":"string","title":"Llm Answer","description":"大模型的回答"},"related_chunk":{"type":"string","title":"Related Chunk","description":"检索到的片段"},"doc_name":{"type":"string","title":"Doc Name","description":"来源文档"},"score":{"type":"number","title":"Score","description":"综合得分"},"pre":{"type":"number","title":"Pre","description":"精确率"},"rec":{"type":"number","title":"Rec","description":"召回率"},"fai":{"type":"number","title":"Fai","description":"忠实值"},"rel":{"type":"number","title":"Rel","description":"可解释性"},"lcs":{"type":"number","title":"Lcs","description":"最长公共子串得分"},"leve":{"type":"number","title":"Leve","description":"编辑距离得分"},"jac":{"type":"number","title":"Jac","description":"杰卡德相似系数"}},"type":"object","required":["testCaseId","question","answer","llm_answer","related_chunk","doc_name","score","pre","rec","fai","rel","lcs","leve","jac"],"title":"TestCase","description":"测试用例信息"},"Testing":{"properties":{"testingId":{"type":"string","format":"uuid","title":"Testingid","description":"测试ID"},"testingName":{"type":"string","title":"Testingname","description":"测试名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"测试描述","max":150},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"测试使用的大模型信息"},"searchMethod":{"$ref":"#/components/schemas/SearchMethod"},"testingTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"测试任务"},"aveScore":{"type":"number","title":"Avescore","description":"综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"杰卡德相似系数","default":-1},"authorName":{"type":"string","title":"Authorname","description":"测试创建者的用户名"},"status":{"$ref":"#/components/schemas/TestingStatus"}},"type":"object","required":["testingId","testingName","description","searchMethod","authorName","status"],"title":"Testing","description":"测试信息"},"TestingStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"TestingStatus","description":"测试状态"},"TestingTestCase":{"properties":{"aveScore":{"type":"number","title":"Avescore","description":"平均综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"平均精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"平均召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"平均忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"平均可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"平均最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"平均编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"平均杰卡德相似系数","default":-1},"testCases":{"items":{"$ref":"#/components/schemas/TestCase"},"type":"array","title":"Testcases","description":"测试用例列表","default":[]}},"type":"object","title":"TestingTestCase","description":"GET /testing/testcase 数据结构"},"Tokenizer":{"type":"string","enum":["中文","en","mix"],"title":"Tokenizer","description":"分词器"},"TypeAction":{"properties":{"actionType":{"$ref":"#/components/schemas/ActionType"},"actions":{"items":{"$ref":"#/components/schemas/action"},"type":"array","title":"Actions","description":"操作列表","default":[]}},"type":"object","required":["actionType"],"title":"TypeAction","description":"不同类别的类别操作"},"UpdateChunkRequest":{"properties":{"text":{"type":"string","minLength":1,"title":"Text","description":"分块文本内容"},"enabled":{"type":"boolean","title":"Enabled","description":"分块是否启用","default":true}},"type":"object","required":["text"],"title":"UpdateChunkRequest"},"UpdateChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"分片ID"}},"type":"object","title":"UpdateChunkResponse","description":"PUT /chunk 响应"},"UpdateDataRequest":{"properties":{"question":{"type":"string","maxLength":200,"minLength":1,"title":"Question","description":"问题","default":"这是一个默认的问题"},"answer":{"type":"string","maxLength":1000,"minLength":1,"title":"Answer","description":"答案","default":"这是一个默认的答案"}},"type":"object","title":"UpdateDataRequest"},"UpdateDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据ID"}},"type":"object","title":"UpdateDataResponse","description":"PUT /dataset/data 响应"},"UpdateDatasetRequest":{"properties":{"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""}},"type":"object","title":"UpdateDatasetRequest"},"UpdateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"UpdateDatasetResponse","description":"PUT /dataset 响应"},"UpdateDocumentRequest":{"properties":{"docName":{"type":"string","maxLength":150,"minLength":1,"title":"Docname","default":"这是一个默认的文档名称"},"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"chunkSize":{"type":"integer","title":"Chunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用","default":true}},"type":"object","title":"UpdateDocumentRequest"},"UpdateDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","format":"uuid","title":"Result","description":"文档ID"}},"type":"object","title":"UpdateDocumentResponse","description":"PUT /doc 响应"},"UpdateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":30,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"UpdateKnowledgeBaseRequest"},"UpdateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"UpdateKnowledgeBaseResponse","description":"PUT /kb 响应"},"UpdateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"UpdateRoleRequest"},"UpdateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"UpdateRoleResponse","description":"PUT /role 响应"},"UpdateTeamAuthorResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamAuthorResponse","description":"PUT /team/author 响应"},"UpdateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"UpdateTeamRequest"},"UpdateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamResponse","description":"PUT /team 响应"},"UpdateTeamUserRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队成员ID"}},"type":"object","title":"UpdateTeamUserRoleResponse","description":"PUT /team/usr 响应"},"UpdateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":150,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"测试数据集id"},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_embedding"}},"type":"object","required":["datasetId","llmId"],"title":"UpdateTestingRequest"},"UpdateTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"UpdateTestingResponse","description":"PUT /testing 响应"},"UpdateUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"消息ID"}},"type":"object","title":"UpdateUserMessageResponse","description":"PUT /usr_msg 响应"},"UploadDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"UploadDocumentResponse","description":"POST /doc 响应"},"UserMessageStatus":{"type":"string","enum":["unread","accepted","rejected"],"title":"UserMessageStatus","description":"用户消息状态"},"UserMessageType":{"type":"string","enum":["invitation","application"],"title":"UserMessageType","description":"用户消息类型"},"UserMsg":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"senderId":{"type":"string","format":"uuid","title":"Senderid","description":"发送者ID"},"senderName":{"type":"string","title":"Sendername","description":"发送者名称"},"receiverId":{"type":"string","format":"uuid","title":"Receiverid","description":"接收者ID"},"receiverName":{"type":"string","title":"Receivername","description":"接收者名称"},"msgType":{"allOf":[{"$ref":"#/components/schemas/UserMessageType"}],"description":"消息类型"},"msgStatus":{"allOf":[{"$ref":"#/components/schemas/UserMessageStatus"}],"description":"消息状态"},"createdTime":{"type":"string","title":"Createdtime","description":"创建时间"}},"type":"object","required":["teamId","msgId","senderId","senderName","receiverId","receiverName","msgType","msgStatus","createdTime"],"title":"UserMsg","description":"用户消息"},"UserStatus":{"type":"string","enum":["active","inactive","deleted"],"title":"UserStatus","description":"用户状态"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"action":{"properties":{"actionName":{"type":"string","title":"Actionname","description":"操作名称","max":20,"min":1},"action":{"type":"string","title":"Action","description":"操作","max":20,"min":1},"isUsed":{"type":"boolean","title":"Isused","description":"是否启用"}},"type":"object","required":["actionName","action","isUsed"],"title":"action","description":"操作信息"},"role":{"properties":{"roleId":{"type":"string","format":"uuid","title":"Roleid","description":"角色ID"},"roleName":{"type":"string","title":"Rolename","description":"角色名称","max":20,"min":1},"typeActions":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Typeactions","description":"操作类型列表","default":[]}},"type":"object","required":["roleId","roleName"],"title":"role","description":"角色信息"}}}} \ No newline at end of file -- Gitee From d48039508970a2fb9a055cc1d4b6a833f6e119d9 Mon Sep 17 00:00:00 2001 From: zxstty Date: Tue, 13 May 2025 15:51:37 +0800 Subject: [PATCH 03/15] =?UTF-8?q?=E5=AE=8C=E5=96=84=E6=A3=80=E7=B4=A2?= =?UTF-8?q?=E5=A2=9E=E5=BC=BA=E7=9A=84router?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../base/task/worker/parse_document_worker.py | 15 ++-- data_chain/apps/router/chunk.py | 12 ++++ data_chain/apps/service/chunk_service.py | 69 ++++++++++++++++++- data_chain/entities/request_data.py | 16 ++++- data_chain/entities/response_data.py | 17 +++++ data_chain/manager/chunk_manager.py | 26 ++++--- data_chain/manager/document_manager.py | 8 +-- data_chain/rag/base_searcher.py | 14 ++-- data_chain/rag/doc2chunk_bfs_searcher.py | 57 +++++++++++++++ data_chain/rag/doc2chunk_searcher.py | 12 ++-- data_chain/rag/enhanced_by_llm_searcher.py | 8 +-- data_chain/rag/keyword_and_vector_searcher.py | 8 +-- data_chain/rag/keyword_searcher.py | 6 +- data_chain/rag/vector_searcher.py | 6 +- data_chain/stores/database/database.py | 1 + 15 files changed, 229 insertions(+), 46 deletions(-) diff --git a/data_chain/apps/base/task/worker/parse_document_worker.py b/data_chain/apps/base/task/worker/parse_document_worker.py index e789e96..a5fd830 100644 --- a/data_chain/apps/base/task/worker/parse_document_worker.py +++ b/data_chain/apps/base/task/worker/parse_document_worker.py @@ -305,7 +305,7 @@ class ParseDocumentWorker(BaseWorker): if len(parent_node.title) > 0: content += parent_node.title + '\n' else: - sentences = TokenTool.get_leave_setences_from_content_len(parent_node.content, 1) + sentences = TokenTool.get_top_k_keysentence(parent_node.content, 1) if sentences: content += sentences[0] + '\n' index = 0 @@ -314,13 +314,13 @@ class ParseDocumentWorker(BaseWorker): if node.title: content += node.title + '\n' else: - sentences = TokenTool.get_leave_setences_from_content_len(node.content, 1) + sentences = TokenTool.get_top_k_keysentence(node.content, 1) if sentences: content += sentences[0] + '\n' index += 1 title = await TokenTool.get_title_by_llm(content, llm) if not title: - sentences = TokenTool.get_leave_setences_from_content_len(content, 1) + sentences = TokenTool.get_top_k_keysentence(content, 1) if sentences: title = sentences[0] node.text_feature = title @@ -330,17 +330,17 @@ class ParseDocumentWorker(BaseWorker): if len(parent_node.title) > 0: content += parent_node.title + '\n' else: - sentences = TokenTool.get_leave_setences_from_content_len(parent_node.content, 1) + sentences = TokenTool.get_top_k_keysentence(parent_node.content, 1) if sentences: content += sentences[0] + '\n' for node in node.link_nodes: if node.title: content += node.title + '\n' else: - sentences = TokenTool.get_leave_setences_from_content_len(node.content, 1) + sentences = TokenTool.get_top_k_keysentence(node.content, 1) if sentences: content += sentences[0] + '\n' - sentences = TokenTool.get_leave_setences_from_content_len(content, 1) + sentences = TokenTool.get_top_k_keysentence(content, 1) if sentences: node.text_feature = sentences[0] else: @@ -361,7 +361,7 @@ class ParseDocumentWorker(BaseWorker): if llm is not None: abstract = await TokenTool.get_abstract_by_llm(abstract, llm) else: - sentences = TokenTool.get_leave_setences_from_content_len(abstract, 1) + sentences = TokenTool.get_top_k_keysentence(abstract, 1) if sentences: abstract = sentences[0] else: @@ -396,6 +396,7 @@ class ParseDocumentWorker(BaseWorker): team_id=doc_entity.team_id, kb_id=doc_entity.kb_id, doc_id=doc_entity.id, + doc_name=doc_entity.name, text=node.content, text_vector=node.vector, tokens=TokenTool.get_tokens(node.content), diff --git a/data_chain/apps/router/chunk.py b/data_chain/apps/router/chunk.py index e378f7b..f7544f4 100644 --- a/data_chain/apps/router/chunk.py +++ b/data_chain/apps/router/chunk.py @@ -5,12 +5,14 @@ from typing import Annotated from uuid import UUID from data_chain.entities.request_data import ( ListChunkRequest, + SearchChunkRequest, UpdateChunkRequest ) from data_chain.entities.response_data import ( ListChunkMsg, ListChunkResponse, + SearchChunkResponse, UpdateChunkResponse ) from data_chain.apps.service.router_service import get_route_info @@ -31,6 +33,16 @@ async def list_chunks_by_document_id( return ListChunkResponse(result=list_chunk_msg) +@router.post('/search', response_model=SearchChunkResponse, dependencies=[Depends(verify_user)]) +async def search_chunks( + user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + req: Annotated[SearchChunkRequest, Body()], +): + search_chunk_msg = await ChunkService.search_chunks(req) + return SearchChunkResponse(result=search_chunk_msg) + + @router.put('', response_model=UpdateChunkResponse, dependencies=[Depends(verify_user)]) async def update_chunk_by_id(user_sub: Annotated[str, Depends(get_user_sub)], action: Annotated[str, Depends(get_route_info)], diff --git a/data_chain/apps/service/chunk_service.py b/data_chain/apps/service/chunk_service.py index 0b11d46..821dcf0 100644 --- a/data_chain/apps/service/chunk_service.py +++ b/data_chain/apps/service/chunk_service.py @@ -6,12 +6,16 @@ import traceback import os from data_chain.entities.request_data import ( ListChunkRequest, + SearchChunkRequest, UpdateChunkRequest, ) from data_chain.entities.response_data import ( Task, Document, - ListChunkMsg + Chunk, + DocChunk, + ListChunkMsg, + SearchChunkMsg ) from data_chain.apps.base.convertor import Convertor from data_chain.apps.service.task_queue_service import TaskQueueService @@ -27,6 +31,8 @@ from data_chain.stores.minio.minio import MinIO from data_chain.entities.enum import ParseMethod, DataSetStatus, DocumentStatus, TaskType from data_chain.entities.common import DOC_PATH_IN_OS, DOC_PATH_IN_MINIO, DEFAULt_DOC_TYPE_ID from data_chain.logger.logger import logger as logging +from data_chain.rag.base_searcher import BaseSearcher +from data_chain.parser.tools.token_tool import TokenTool class ChunkService: @@ -63,6 +69,65 @@ class ChunkService: logging.exception("[ChunkService] %s", err) raise e + async def search_chunks(req: SearchChunkRequest) -> SearchChunkMsg: + """根据查询条件搜索分片""" + chunk_entities = [] + for kb_id in req.kb_ids: + try: + chunk_entities += await BaseSearcher.search(req.search_method.value, kb_id, req.query, 2*req.top_k, req.doc_ids, req.banned_ids) + except Exception as e: + err = f"[ChunkService] 搜索分片失败,error: {e}" + logging.exception(err) + return SearchChunkMsg(total=0, chunks=[]) + if len(chunk_entities) == 0: + return SearchChunkMsg(docChunks=[]) + if req.is_rerank: + chunk_entities = await BaseSearcher.rerank(chunk_entities, req.query) + chunk_entities = chunk_entities[:req.top_k] + + chunk_ids = [chunk_entity.id for chunk_entity in chunk_entities] + if req.is_related_surrounding: + # 关联上下文 + tokens_limit = req.tokens_limit + tokens_limit_every_chunk = tokens_limit // len(chunk_entities) + leave_tokens = 0 + for chunk_entity in chunk_entities: + leave_tokens = tokens_limit_every_chunk+leave_tokens + try: + related_chunk_entities = await BaseSearcher.related_surround_chunk(chunk_entity, tokens_limit-chunk_entity.tokens, chunk_ids) + except Exception as e: + leave_tokens += tokens_limit_every_chunk + err = f"[ChunkService] 关联上下文失败,error: {e}" + logging.exception(err) + continue + tokens_sum = 0 + for related_chunk_entity in related_chunk_entities: + tokens_sum += related_chunk_entity.tokens + leave_tokens -= tokens_sum + if leave_tokens < 0: + leave_tokens = 0 + chunk_ids += [chunk_entity.id for chunk_entity in related_chunk_entities] + chunk_entities += related_chunk_entities + search_chunk_msg = SearchChunkMsg(docChunks=[]) + if req.is_classify_by_doc: + doc_chunks = await BaseSearcher.classify_by_doc_id(chunk_entities) + for doc_chunk in doc_chunks: + dc = DocChunk(doc_id=doc_chunk.doc_id, doc_name=doc_chunk.doc_name, chunks=[]) + for chunk_entity in doc_chunk.chunks: + chunk = await Convertor.convert_chunk_entity_to_chunk(chunk_entity) + if req.is_compress: + chunk.text = TokenTool.compress_tokens(chunk.text) + dc.chunks.append(chunk) + search_chunk_msg.docChunks.append(dc) + else: + for chunk_entity in chunk_entities: + chunk = await Convertor.convert_chunk_entity_to_chunk(chunk_entity) + if req.is_compress: + chunk.text = TokenTool.compress_tokens(chunk.text) + dc = DocChunk(doc_id=chunk_entity.doc_id, doc_name=chunk_entity.doc_name, chunks=[chunk]) + search_chunk_msg.docChunks.append(dc) + return search_chunk_msg + async def update_chunk_by_id(chunk_id: uuid.UUID, req: UpdateChunkRequest) -> bool: try: chunk_dict = await Convertor.convert_update_chunk_request_to_dict(req) @@ -71,4 +136,4 @@ class ChunkService: except Exception as e: err = "更新分片失败" logging.exception("[ChunkService] %s", err) - raise e + raise err diff --git a/data_chain/entities/request_data.py b/data_chain/entities/request_data.py index b5d8e4c..6feceeb 100644 --- a/data_chain/entities/request_data.py +++ b/data_chain/entities/request_data.py @@ -135,6 +135,21 @@ class ListChunkRequest(BaseModel): page_size: int = Field(default=40, description="每页数量", alias="pageSize") +class SearchChunkRequest(BaseModel): + kb_ids: List[uuid.UUID] = Field(default=[], description="资产id", alias="kbIds") + query: str = Field(default='', description="查询内容") + top_k: int = Field(default=5, description="返回的结果数量", alias="topK") + doc_ids: Optional[List[uuid.UUID]] = Field(default=None, description="文档id", alias="docIds") + banned_ids: Optional[List[uuid.UUID]] = Field(default=[], description="禁止的分块id", alias="bannedIds") + search_method: SearchMethod = Field(default=SearchMethod.KEYWORD_AND_VECTOR, + description="检索方法", alias="searchMethod") + is_related_surrounding: bool = Field(default=True, description="是否关联上下文", alias="isRelatedSurrounding") + is_classify_by_doc: bool = Field(default=False, description="是否按文档分类", alias="isClassifyByDoc") + is_rerank: bool = Field(default=False, description="是否重新排序", alias="isRerank") + is_compress: bool = Field(default=False, description="是否压缩", alias="isCompress") + tokens_limit: int = Field(default=8192, description="token限制", alias="tokensLimit") + + class ListDatasetRequest(BaseModel): kb_id: Optional[uuid.UUID] = Field(default=None, description="资产id", alias="kbId") dataset_id: Optional[uuid.UUID] = Field(default=None, description="数据集id", alias="datasetId") @@ -207,7 +222,6 @@ class UpdateTestingRequest(BaseModel): testing_name: str = Field(default='这是一个默认的测试名称', description="测试名称", min_length=1, max_length=150, alias="testingName") description: str = Field(default='', description="测试简介", max_length=200) - dataset_id: uuid.UUID = Field(description="测试数据集id", alias="datasetId") llm_id: str = Field(description="测试使用的大模型id", alias="llmId") search_method: SearchMethod = Field(default=SearchMethod.KEYWORD_EMBEDDING, description="测试使用的检索方法", alias="searchMethod") diff --git a/data_chain/entities/response_data.py b/data_chain/entities/response_data.py index ddfe55b..953a257 100644 --- a/data_chain/entities/response_data.py +++ b/data_chain/entities/response_data.py @@ -296,6 +296,23 @@ class UpdateChunkResponse(ResponseData): result: Optional[uuid.UUID] = Field(default=None, description="分片ID") +class DocChunk(BaseModel): + """Post /chunk/search 数据结构""" + doc_id: uuid.UUID = Field(description="文档ID", alias="docId") + doc_name: str = Field(description="文档名称", alias="docName") + chunks: list[Chunk] = Field(default=[], description="分片列表", alias="chunks") + + +class SearchChunkMsg(BaseModel): + """Post /chunk/search 数据结构""" + doc_chunks: list[DocChunk] = Field(default=[], description="文档分片列表", alias="docChunks") + + +class SearchChunkResponse(ResponseData): + """POST /chunk/search 响应""" + result: SearchChunkMsg = Field(default=SearchChunkMsg(), description="文档分片列表数据结构") + + class LLM(BaseModel): llm_id: str = Field(description="大模型ID", alias="llmId") llm_name: str = Field(description="大模型名称", min=1, max=20, alias="llmName") diff --git a/data_chain/manager/chunk_manager.py b/data_chain/manager/chunk_manager.py index 10270fe..2854d03 100644 --- a/data_chain/manager/chunk_manager.py +++ b/data_chain/manager/chunk_manager.py @@ -134,7 +134,8 @@ class ChunkManager(): @staticmethod async def get_top_k_chunk_by_kb_id_vector( kb_id: uuid.UUID, vector: List[float], - top_k: int, doc_ids: list[uuid.UUID] = None, banned_id: list[uuid.UUID] = []) -> List[ChunkEntity]: + top_k: int, doc_ids: list[uuid.UUID] = None, banned_ids: list[uuid.UUID] = [], + chunk_to_type: str = None, pre_ids: list[uuid.UUID] = None) -> List[ChunkEntity]: """根据知识库ID和向量查询文档解析结果""" try: async with await DataBase.get_session() as session: @@ -147,12 +148,16 @@ class ChunkManager(): .where(DocumentEntity.status != DocumentStatus.DELETED.value) .where(ChunkEntity.kb_id == kb_id) .where(ChunkEntity.status != ChunkStatus.DELETED.value) - .where(ChunkEntity.id.notin_(banned_id)) + .where(ChunkEntity.id.notin_(banned_ids)) .order_by(ChunkEntity.text_vector.cosine_distance(vector).desc()) .limit(top_k) ) if doc_ids: stmt = stmt.where(DocumentEntity.id.in_(doc_ids)) + if chunk_to_type: + stmt = stmt.where(ChunkEntity.parse_topology_type == chunk_to_type) + if pre_ids: + stmt = stmt.where(ChunkEntity.pre_id_in_parse_topology.in_(pre_ids)) result = await session.execute(stmt) chunk_entities = result.scalars().all() return chunk_entities @@ -163,8 +168,8 @@ class ChunkManager(): @staticmethod async def get_top_k_chunk_by_kb_id_keyword( - kb_id: uuid.UUID, query: str, - top_k: int, doc_ids: list[uuid.UUID] = None, banned_id: list[uuid.UUID] = []) -> List[ChunkEntity]: + kb_id: uuid.UUID, query: str, top_k: int, doc_ids: list[uuid.UUID] = None, banned_ids: list[uuid.UUID] = [], + chunk_to_type: str = None, pre_ids: list[uuid.UUID] = None) -> List[ChunkEntity]: """根据知识库ID和关键词查询文档解析结果""" try: async with await DataBase.get_session() as session: @@ -186,7 +191,7 @@ class ChunkManager(): .where(DocumentEntity.status != DocumentStatus.DELETED.value) .where(ChunkEntity.kb_id == kb_id) .where(ChunkEntity.status != ChunkStatus.DELETED.value) - .where(ChunkEntity.id.notin_(banned_id)) + .where(ChunkEntity.id.notin_(banned_ids)) .order_by( func.ts_rank_cd( func.to_tsvector(tokenizer, ChunkEntity.text), @@ -197,6 +202,10 @@ class ChunkManager(): ) if doc_ids: stmt = stmt.where(DocumentEntity.id.in_(doc_ids)) + if chunk_to_type: + stmt = stmt.where(ChunkEntity.parse_topology_type == chunk_to_type) + if pre_ids: + stmt = stmt.where(ChunkEntity.pre_id_in_parse_topology.in_(pre_ids)) result = await session.execute(stmt) chunk_entities = result.scalars().all() return chunk_entities @@ -208,7 +217,7 @@ class ChunkManager(): @staticmethod async def fetch_surrounding_chunk_by_doc_id_and_global_offset( doc_id: uuid.UUID, global_offset: int, - top_k: int = 50) -> List[ChunkEntity]: + top_k: int = 50, banned_ids: list[uuid.UUID] = []) -> List[ChunkEntity]: """根据文档ID和全局偏移量查询文档解析结果""" try: async with await DataBase.get_session() as session: @@ -216,8 +225,9 @@ class ChunkManager(): select(ChunkEntity) .where(and_(ChunkEntity.doc_id == doc_id, ChunkEntity.status != ChunkStatus.DELETED.value)) - .where(or_(ChunkEntity.global_offset >= global_offset - top_k, - ChunkEntity.global_offset <= global_offset + top_k)) + .where(and_(ChunkEntity.global_offset >= global_offset - top_k, + ChunkEntity.global_offset <= global_offset + top_k)) + .where(ChunkEntity.id.notin_(banned_ids)) .order_by(ChunkEntity.global_offset) ) result = await session.execute(stmt) diff --git a/data_chain/manager/document_manager.py b/data_chain/manager/document_manager.py index 39aece1..4db614c 100644 --- a/data_chain/manager/document_manager.py +++ b/data_chain/manager/document_manager.py @@ -37,14 +37,14 @@ class DocumentManager(): @staticmethod async def get_top_k_document_by_kb_id_vector( kb_id: uuid.UUID, vector: list[float], - top_k: int = 5, doc_ids: list[uuid.UUID] = None, banned_id: list[uuid.UUID] = []) -> List[DocumentEntity]: + top_k: int = 5, doc_ids: list[uuid.UUID] = None, banned_ids: list[uuid.UUID] = []) -> List[DocumentEntity]: """根据知识库ID和向量获取前K个文档""" try: async with await DataBase.get_session() as session: stmt = ( select(DocumentEntity) .where(DocumentEntity.kb_id == kb_id) - .where(DocumentEntity.id.notin_(banned_id)) + .where(DocumentEntity.id.notin_(banned_ids)) .where(DocumentEntity.status != DocumentStatus.DELETED.value) .where(DocumentEntity.enabled == True) .where(DocumentEntity.abstract_vector.cosine_distance(vector).desc()) @@ -63,7 +63,7 @@ class DocumentManager(): @staticmethod async def get_top_k_document_by_kb_id_keyword( - kb_id: uuid.UUID, query: str, top_k: int = 5, doc_ids: list[uuid.UUID] = None, banned_id: list[uuid.UUID] = []) -> List[DocumentEntity]: + kb_id: uuid.UUID, query: str, top_k: int = 5, doc_ids: list[uuid.UUID] = None, banned_ids: list[uuid.UUID] = []) -> List[DocumentEntity]: """根据知识库ID和关键词获取前K个文档""" try: async with await DataBase.get_session() as session: @@ -79,7 +79,7 @@ class DocumentManager(): stmt = ( select(DocumentEntity) .where(DocumentEntity.kb_id == kb_id) - .where(DocumentEntity.id.notin_(banned_id)) + .where(DocumentEntity.id.notin_(banned_ids)) .where(DocumentEntity.status != DocumentStatus.DELETED.value) .where(DocumentEntity.enabled == True) .where(func.ts_rank_cd( diff --git a/data_chain/rag/base_searcher.py b/data_chain/rag/base_searcher.py index 5d82eda..c65f6c1 100644 --- a/data_chain/rag/base_searcher.py +++ b/data_chain/rag/base_searcher.py @@ -29,7 +29,7 @@ class BaseSearcher: @staticmethod async def search( search_method: str, kb_id: uuid.UUID, query: str, top_k: int = 5, doc_ids: list[uuid.UUID] = None, - banned_id: list[uuid.UUID] = []) -> list[ChunkEntity]: + banned_ids: list[uuid.UUID] = []) -> list[ChunkEntity]: """ 检索器 :param search_method: 检索器方法 @@ -40,7 +40,7 @@ class BaseSearcher: search_class = BaseSearcher.find_worker_class(search_method) if search_class: return await search_class.search( - query=query, kb_id=kb_id, top_k=top_k, doc_ids=doc_ids, banned_id=banned_id + query=query, kb_id=kb_id, top_k=top_k, doc_ids=doc_ids, banned_ids=banned_ids ) else: err = f"[BaseSearch] 检索器不存在,search_method: {search_method}" @@ -64,14 +64,15 @@ class BaseSearcher: return sorted_chunk_entities @staticmethod - async def related_surround_chunk(chunk_entity: ChunkEntity, tokens_limit: int = 1024) -> list[ChunkEntity]: + async def related_surround_chunk( + chunk_entity: ChunkEntity, tokens_limit: int = 1024, banned_ids: list[uuid.UUID] = []) -> list[ChunkEntity]: """ 相关上下文 :param list: 检索结果 :param query: 查询 :return: 相关上下文 """ - chunk_entities = await ChunkManager.fetch_surrounding_chunk_by_doc_id_and_global_offset(chunk_entity.doc_id, chunk_entity.global_offset) + chunk_entities = await ChunkManager.fetch_surrounding_chunk_by_doc_id_and_global_offset(chunk_entity.doc_id, chunk_entity.global_offset, banned_ids) chunk_entity_dict = {} min_offset = chunk_entity.global_offset max_offset = chunk_entity.global_offset @@ -85,8 +86,11 @@ class BaseSearcher: upper = chunk_entity.global_offset+1 related_chunk_entities = [] tokens_sub = 0 + tokens_sum = 0 find_lower = True while 1: + if tokens_sum >= tokens_limit: + break if lower < min_offset and upper > max_offset: break if tokens_sub < 0: @@ -115,11 +119,13 @@ class BaseSearcher: if chunk_entity_dict.get(lower) is not None: tokens_sub += chunk_entity_dict[lower].tokens related_chunk_entities.append(chunk_entity_dict[lower]) + tokens_sum += chunk_entity_dict[lower].tokens lower -= 1 else: if chunk_entity_dict.get(upper) is not None: tokens_sub += chunk_entity_dict[upper].tokens related_chunk_entities.append(chunk_entity_dict[upper]) + tokens_sum += chunk_entity_dict[upper].tokens upper += 1 return related_chunk_entities diff --git a/data_chain/rag/doc2chunk_bfs_searcher.py b/data_chain/rag/doc2chunk_bfs_searcher.py index e69de29..ac6e8ca 100644 --- a/data_chain/rag/doc2chunk_bfs_searcher.py +++ b/data_chain/rag/doc2chunk_bfs_searcher.py @@ -0,0 +1,57 @@ +import uuid +from pydantic import BaseModel, Field +import random +from data_chain.logger.logger import logger as logging +from data_chain.stores.database.database import ChunkEntity +from data_chain.parser.tools.token_tool import TokenTool +from data_chain.manager.document_manager import DocumentManager +from data_chain.manager.chunk_manager import ChunkManager +from data_chain.rag.base_searcher import BaseSearcher +from data_chain.embedding.embedding import Embedding +from data_chain.entities.enum import SearchMethod +from data_chain.entities.enum import ChunkParseTopology + + +class Doc2Chunk(BaseModel): + """ + 关键词向量检索 + """ + name = SearchMethod.DOC2CHUNK.value + + @staticmethod + async def search( + query: str, kb_id: uuid.UUID, top_k: int = 5, doc_ids: list[uuid.UUID] = None, + banned_ids: list[uuid.UUID] = [] + ) -> list[ChunkEntity]: + """ + 向量检索 + :param query: 查询 + :param top_k: 返回的结果数量 + :return: 检索结果 + """ + vector = await Embedding.vectorize_embedding(query) + try: + root_chunk_entities_keyword = await ChunkManager.get_top_k_chunk_by_kb_id_keyword(kb_id, query, top_k//2, doc_ids, banned_ids, ChunkParseTopology.TREEROOT.value) + banned_ids += [chunk_entity.id for chunk_entity in root_chunk_entities_keyword] + root_chunk_entities_vector = await ChunkManager.get_top_k_chunk_by_kb_id_vector(kb_id, vector, top_k-len(root_chunk_entities_keyword), doc_ids, banned_ids, ChunkParseTopology.TREEROOT.value) + banned_ids += [chunk_entity.id for chunk_entity in root_chunk_entities_vector] + chunk_entities = root_chunk_entities_keyword + root_chunk_entities_vector + pre_ids = [chunk_entity.id for chunk_entity in chunk_entities] + rd = 0 + max_retry = 5 + while rd < max_retry: + root_chunk_entities_keyword = await ChunkManager.get_top_k_chunk_by_kb_id_keyword(kb_id, query, top_k//2, doc_ids, banned_ids, None, pre_ids) + banned_ids += [chunk_entity.id for chunk_entity in root_chunk_entities_keyword] + root_chunk_entities_vector = await ChunkManager.get_top_k_chunk_by_kb_id_vector(kb_id, vector, top_k-len(root_chunk_entities_keyword), doc_ids, banned_ids, None, pre_ids) + banned_ids += [chunk_entity.id for chunk_entity in root_chunk_entities_vector] + sub_chunk_entities = root_chunk_entities_keyword + root_chunk_entities_vector + if len(sub_chunk_entities) == 0: + break + chunk_entities += sub_chunk_entities + pre_ids += [chunk_entity.id for chunk_entity in sub_chunk_entities] + rd += 1 + except Exception as e: + err = f"[KeywordVectorSearcher] 关键词向量检索失败,error: {e}" + logging.exception(err) + return [] + return chunk_entities diff --git a/data_chain/rag/doc2chunk_searcher.py b/data_chain/rag/doc2chunk_searcher.py index d42899c..470ce73 100644 --- a/data_chain/rag/doc2chunk_searcher.py +++ b/data_chain/rag/doc2chunk_searcher.py @@ -19,8 +19,8 @@ class Doc2Chunk(BaseModel): @staticmethod async def search( - query: str, kb_id: uuid.UUID, top_k: int = 5, doc_ids=list[uuid.UUID], - banned_id=list[uuid.UUID] + query: str, kb_id: uuid.UUID, top_k: int = 5, doc_ids: list[uuid.UUID] = None, + banned_ids: list[uuid.UUID] = [] ) -> list[ChunkEntity]: """ 向量检索 @@ -30,13 +30,13 @@ class Doc2Chunk(BaseModel): """ vector = await Embedding.vectorize_embedding(query) try: - doc_entities_keyword = await DocumentManager.get_top_k_document_by_kb_id_keyword(kb_id, query, top_k, doc_ids, banned_id) + doc_entities_keyword = await DocumentManager.get_top_k_document_by_kb_id_keyword(kb_id, query, top_k, doc_ids, banned_ids) use_doc_ids = [doc_entity.id for doc_entity in doc_entities_keyword] - doc_entities_vector = await DocumentManager.get_top_k_document_by_kb_id_vector(kb_id, vector, top_k-len(doc_entities_keyword), doc_ids, banned_id) + doc_entities_vector = await DocumentManager.get_top_k_document_by_kb_id_vector(kb_id, vector, top_k-len(doc_entities_keyword), doc_ids, banned_ids) use_doc_ids += [doc_entity.id for doc_entity in doc_entities_vector] - chunk_entities_keyword = await ChunkManager.get_top_k_chunk_by_kb_id_keyword(kb_id, query, top_k//2, use_doc_ids, banned_id) + chunk_entities_keyword = await ChunkManager.get_top_k_chunk_by_kb_id_keyword(kb_id, query, top_k//2, use_doc_ids, banned_ids) chunk_ids = [chunk_entity.id for chunk_entity in chunk_entities_keyword] - chunk_entities_vector += await ChunkManager.get_top_k_chunk_by_kb_id_vector(kb_id, vector, top_k-len(chunk_entities_keyword), use_doc_ids, banned_id+chunk_ids) + chunk_entities_vector += await ChunkManager.get_top_k_chunk_by_kb_id_vector(kb_id, vector, top_k-len(chunk_entities_keyword), use_doc_ids, banned_ids+chunk_ids) chunk_entities = chunk_entities_keyword + chunk_entities_vector except Exception as e: err = f"[KeywordVectorSearcher] 关键词向量检索失败,error: {e}" diff --git a/data_chain/rag/enhanced_by_llm_searcher.py b/data_chain/rag/enhanced_by_llm_searcher.py index 2adea98..066928d 100644 --- a/data_chain/rag/enhanced_by_llm_searcher.py +++ b/data_chain/rag/enhanced_by_llm_searcher.py @@ -22,8 +22,8 @@ class EnhancedByLLMSearcher(BaseSearcher): @staticmethod async def search( - query: str, kb_id: uuid.UUID, top_k: int = 5, doc_ids=list[uuid.UUID], - banned_id=list[uuid.UUID] + query: str, kb_id: uuid.UUID, top_k: int = 5, doc_ids: list[uuid.UUID] = None, + banned_ids: list[uuid.UUID] = [] ) -> list[ChunkEntity]: """ 向量检索 @@ -47,7 +47,7 @@ class EnhancedByLLMSearcher(BaseSearcher): ) while len(chunk_entities) < top_k and rd < max_retry: rd += 1 - sub_chunk_entities = await ChunkManager.get_top_k_chunk_by_kb_id_vector(kb_id, vector, top_k, doc_ids, banned_id) + sub_chunk_entities = await ChunkManager.get_top_k_chunk_by_kb_id_vector(kb_id, vector, top_k, doc_ids, banned_ids) for chunk_entity in sub_chunk_entities: sys_call = prompt_template.format( chunk=chunk_entity.text, @@ -59,7 +59,7 @@ class EnhancedByLLMSearcher(BaseSearcher): if result == "yes": chunk_entities.append(chunk_entity) chunk_ids = [chunk_entity.id for chunk_entity in sub_chunk_entities] - banned_id += chunk_ids + banned_ids += chunk_ids return chunk_entities[:top_k] except Exception as e: err = f"[KeywordVectorSearcher] 关键词向量检索失败,error: {e}" diff --git a/data_chain/rag/keyword_and_vector_searcher.py b/data_chain/rag/keyword_and_vector_searcher.py index 4c82ec7..18cfdc6 100644 --- a/data_chain/rag/keyword_and_vector_searcher.py +++ b/data_chain/rag/keyword_and_vector_searcher.py @@ -18,8 +18,8 @@ class KeywordVectorSearcher(BaseSearcher): @staticmethod async def search( - query: str, kb_id: uuid.UUID, top_k: int = 5, doc_ids=list[uuid.UUID], - banned_id=list[uuid.UUID] + query: str, kb_id: uuid.UUID, top_k: int = 5, doc_ids: list[uuid.UUID] = None, + banned_ids: list[uuid.UUID] = [] ) -> list[ChunkEntity]: """ 向量检索 @@ -29,9 +29,9 @@ class KeywordVectorSearcher(BaseSearcher): """ vector = await Embedding.vectorize_embedding(query) try: - chunk_entities_get_by_keyword = await ChunkManager.get_top_k_chunk_by_kb_id_keyword(kb_id, query, top_k//2, doc_ids, banned_id) + chunk_entities_get_by_keyword = await ChunkManager.get_top_k_chunk_by_kb_id_keyword(kb_id, query, top_k//2, doc_ids, banned_ids) chunk_ids = [chunk_entity.id for chunk_entity in chunk_entities_get_by_keyword] - chunk_entities_get_by_vector = await ChunkManager.get_top_k_chunk_by_kb_id_vector(kb_id, vector, top_k-len(chunk_entities_get_by_keyword), doc_ids, banned_id+chunk_ids) + chunk_entities_get_by_vector = await ChunkManager.get_top_k_chunk_by_kb_id_vector(kb_id, vector, top_k-len(chunk_entities_get_by_keyword), doc_ids, banned_ids+chunk_ids) chunk_entities = chunk_entities_get_by_keyword + chunk_entities_get_by_vector except Exception as e: err = f"[KeywordVectorSearcher] 关键词向量检索失败,error: {e}" diff --git a/data_chain/rag/keyword_searcher.py b/data_chain/rag/keyword_searcher.py index 40fa2dd..a87cedc 100644 --- a/data_chain/rag/keyword_searcher.py +++ b/data_chain/rag/keyword_searcher.py @@ -15,8 +15,8 @@ class KeyWordSearcher(BaseSearcher): @staticmethod async def search( - query: str, kb_id: uuid.UUID, top_k: int = 5, doc_ids=list[uuid.UUID], - banned_id=list[uuid.UUID] + query: str, kb_id: uuid.UUID, top_k: int = 5, doc_ids: list[uuid.UUID] = None, + banned_ids: list[uuid.UUID] = [] ) -> list[ChunkEntity]: """ 向量检索 @@ -25,7 +25,7 @@ class KeyWordSearcher(BaseSearcher): :return: 检索结果 """ try: - chunk_entities = await ChunkManager.get_top_k_chunk_by_kb_id_keyword(kb_id, query, top_k, doc_ids, banned_id) + chunk_entities = await ChunkManager.get_top_k_chunk_by_kb_id_keyword(kb_id, query, top_k, doc_ids, banned_ids) except Exception as e: err = f"[KeyWordSearcher] 关键词检索失败,error: {e}" logging.exception(err) diff --git a/data_chain/rag/vector_searcher.py b/data_chain/rag/vector_searcher.py index a6bd912..b55ab43 100644 --- a/data_chain/rag/vector_searcher.py +++ b/data_chain/rag/vector_searcher.py @@ -15,8 +15,8 @@ class VectorSearcher(BaseSearcher): @staticmethod async def search( - query: str, kb_id: uuid.UUID, top_k: int = 5, doc_ids=list[uuid.UUID], - banned_id=list[uuid.UUID] + query: str, kb_id: uuid.UUID, top_k: int = 5, doc_ids: list[uuid.UUID] = None, + banned_ids: list[uuid.UUID] = [] ) -> list[ChunkEntity]: """ 向量检索 @@ -26,7 +26,7 @@ class VectorSearcher(BaseSearcher): """ vector = await Embedding.vectorize_embedding(query) try: - chunk_entities = await ChunkManager.get_top_k_chunk_by_kb_id_vector(kb_id, vector, top_k, doc_ids, banned_id) + chunk_entities = await ChunkManager.get_top_k_chunk_by_kb_id_vector(kb_id, vector, top_k, doc_ids, banned_ids) except Exception as e: err = f"[VectorSearcher] 向量检索失败,error: {e}" logging.exception(err) diff --git a/data_chain/stores/database/database.py b/data_chain/stores/database/database.py index 9aec7f8..3f81485 100644 --- a/data_chain/stores/database/database.py +++ b/data_chain/stores/database/database.py @@ -297,6 +297,7 @@ class ChunkEntity(Base): team_id = Column(UUID) # 团队id kb_id = Column(UUID) # 知识库id doc_id = Column(UUID, ForeignKey('document.id', ondelete="CASCADE")) # 片段所属文档id + doc_name = Column(String) # 片段所属文档名称 text = Column(String) # 片段文本内容 text_vector = Column(Vector(1024)) # 文本向量 tokens = Column(Integer) # 片段文本token数 -- Gitee From 4667e4bdc8d404fd32f6d55b80c6e508cf2db4d9 Mon Sep 17 00:00:00 2001 From: zxstty Date: Tue, 13 May 2025 17:59:15 +0800 Subject: [PATCH 04/15] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E8=B5=84=E4=BA=A7?= =?UTF-8?q?=E5=BA=93=E6=9B=B4=E6=96=B0=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data_chain/apps/app.py | 29 +++++++- data_chain/apps/base/convertor.py | 12 ++-- .../base/task/worker/acc_testing_worker.py | 2 +- .../apps/base/task/worker/base_worker.py | 7 +- .../base/task/worker/parse_document_worker.py | 2 +- data_chain/apps/router/document.py | 4 +- data_chain/apps/router/knowledge_base.py | 6 +- data_chain/apps/service/chunk_service.py | 6 +- data_chain/apps/service/document_service.py | 2 +- .../apps/service/knwoledge_base_service.py | 40 +++++++---- data_chain/entities/common.py | 18 ++++- data_chain/entities/request_data.py | 10 +-- data_chain/manager/knowledge_manager.py | 2 +- data_chain/manager/team_manager.py | 19 +++++ data_chain/parser/tools/token_tool.py | 2 +- data_chain/rag/base_searcher.py | 17 ++--- data_chain/stores/database/database.py | 3 +- data_chain/stores/mongodb/mongodb.py | 2 +- openapi.json | 2 +- witchaind_doc/C3-Art0002.txt | 71 +++++++++++++++++++ 20 files changed, 196 insertions(+), 60 deletions(-) create mode 100644 witchaind_doc/C3-Art0002.txt diff --git a/data_chain/apps/app.py b/data_chain/apps/app.py index 2df1ccc..84e7b3d 100644 --- a/data_chain/apps/app.py +++ b/data_chain/apps/app.py @@ -4,6 +4,7 @@ from fastapi import APIRouter, Depends, Query, Body from apscheduler.schedulers.asyncio import AsyncIOScheduler import uvicorn import fastapi +import os from data_chain.entities.common import actions, DEFAULt_DOC_TYPE_ID from data_chain.apps.router import ( team, @@ -21,7 +22,14 @@ from data_chain.apps.router import ( from data_chain.stores.database.database import DataBase, ActionEntity, DocumentTypeEntity from data_chain.manager.role_manager import RoleManager from data_chain.manager.document_type_manager import DocumentTypeManager - +from data_chain.entities.common import ( + DOC_PATH_IN_OS, + EXPORT_KB_PATH_IN_OS, + IMPORT_KB_PATH_IN_OS, + EXPORT_DATASET_PATH_IN_OS, + IMPORT_DATASET_PATH_IN_OS, + TESTING_REPORT_PATH_IN_OS, +) # 关闭APScheduler的运行日志 # logging.getLogger('apscheduler').setLevel(logging.ERROR) from data_chain.apps.service.router_service import get_route_info @@ -36,6 +44,8 @@ async def startup_event(): await DataBase.init_all_table() await add_acitons() await TaskQueueService.init_task_queue() + await add_document_type() + await init_path() scheduler.add_job(TaskQueueService.handle_tasks, 'interval', seconds=5) @@ -50,13 +60,28 @@ async def add_acitons(): async def add_document_type(): - DocumentTypeEntity( + document_type_entity = DocumentTypeEntity( id=DEFAULt_DOC_TYPE_ID, name="default", ) await DocumentTypeManager.add_document_type(document_type_entity) +async def init_path(): + """初始化路径""" + paths = [ + DOC_PATH_IN_OS, + EXPORT_KB_PATH_IN_OS, + IMPORT_KB_PATH_IN_OS, + EXPORT_DATASET_PATH_IN_OS, + IMPORT_DATASET_PATH_IN_OS, + TESTING_REPORT_PATH_IN_OS + ] + for path in paths: + if not os.path.exists(path): + os.makedirs(path) + + async def configure(): app.include_router(team.router) app.include_router(knowledge_base.router) diff --git a/data_chain/apps/base/convertor.py b/data_chain/apps/base/convertor.py index 5065766..02e8f5d 100644 --- a/data_chain/apps/base/convertor.py +++ b/data_chain/apps/base/convertor.py @@ -247,13 +247,9 @@ class Convertor: @staticmethod async def convert_knowledge_base_entity_to_knowledge_base( - knowledge_base_entity: KnowledgeBaseEntity, doc_type_entities: list[DocumentTypeEntity]) -> Knowledgebase: + knowledge_base_entity: KnowledgeBaseEntity) -> Knowledgebase: """将知识库实体转换为知识库""" try: - doc_types = [] - for doc_type_entity in doc_type_entities: - doc_type = await Convertor.convert_document_type_entity_to_document_type_response(doc_type_entity) - doc_types.append(doc_type) knowledge_base = Knowledgebase( kbId=knowledge_base_entity.id, kbName=knowledge_base_entity.name, @@ -268,7 +264,7 @@ class Convertor: defaultParseMethod=knowledge_base_entity.default_parse_method, defaultChunkSize=knowledge_base_entity.default_chunk_size, createdTime=knowledge_base_entity.created_time.strftime('%Y-%m-%d %H:%M'), - docTypes=doc_types, + docTypes=[], ) return knowledge_base except Exception as e: @@ -603,6 +599,7 @@ class Convertor: description=req.description, llm_id=req.llm_id, search_method=req.search_method.value, + top_k=req.top_k, ) return testing_entity except Exception as e: @@ -618,7 +615,8 @@ class Convertor: 'name': req.testing_name, 'description': req.description, 'llm_id': req.llm_id, - 'search_method': req.search_method.value + 'search_method': req.search_method.value, + 'top_k': req.top_k, } return req_dict except Exception as e: diff --git a/data_chain/apps/base/task/worker/acc_testing_worker.py b/data_chain/apps/base/task/worker/acc_testing_worker.py index 9281791..f5312c4 100644 --- a/data_chain/apps/base/task/worker/acc_testing_worker.py +++ b/data_chain/apps/base/task/worker/acc_testing_worker.py @@ -149,7 +149,7 @@ class TestingWorker(BaseWorker): question = qa_entity.question answer = qa_entity.answer chunk = qa_entity.chunk - chunk_entities = await BaseSearcher.search(testing_entity.search_method, question) + chunk_entities = await BaseSearcher.search(testing_entity.search_method, testing_entity.kb_id, question, top_k=testing_entity.top_k, doc_ids=None, banned_ids=[]) doc_chunk_dict = {} for chunk_entity in chunk_entities: if chunk_entity.doc_id not in doc_chunk_dict: diff --git a/data_chain/apps/base/task/worker/base_worker.py b/data_chain/apps/base/task/worker/base_worker.py index 18e6637..ab5d2cf 100644 --- a/data_chain/apps/base/task/worker/base_worker.py +++ b/data_chain/apps/base/task/worker/base_worker.py @@ -1,6 +1,9 @@ # Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. import uuid - +import importlib +import os +import sys +from pathlib import Path from data_chain.apps.base.task.process_handler import ProcessHandler from data_chain.config.config import config from data_chain.entities.enum import TaskStatus @@ -21,7 +24,9 @@ class BaseWorker: @staticmethod def find_worker_class(worker_name): subclasses = BaseWorker.__subclasses__() + print(subclasses) for subclass in subclasses: + print(subclass.name) if subclass.name == worker_name: return subclass return None diff --git a/data_chain/apps/base/task/worker/parse_document_worker.py b/data_chain/apps/base/task/worker/parse_document_worker.py index a5fd830..8718556 100644 --- a/data_chain/apps/base/task/worker/parse_document_worker.py +++ b/data_chain/apps/base/task/worker/parse_document_worker.py @@ -33,7 +33,7 @@ from data_chain.stores.mongodb.mongodb import Task class ParseDocumentWorker(BaseWorker): - name = TaskType.DOC_PARSE + name = TaskType.DOC_PARSE.value @staticmethod async def init(doc_id: uuid.UUID) -> uuid.UUID: diff --git a/data_chain/apps/router/document.py b/data_chain/apps/router/document.py index 84bd8b1..0f74bf1 100644 --- a/data_chain/apps/router/document.py +++ b/data_chain/apps/router/document.py @@ -35,7 +35,7 @@ async def list_doc( action: Annotated[str, Depends(get_route_info)], req: Annotated[ListDocumentRequest, Body()] ): - if not (await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, action)): + if not (await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, req.kb_id, action)): raise Exception("用户没有权限访问该知识库的文档") list_document_msg = await DocumentService.list_doc(req) return ListDocumentResponse(result=list_document_msg) @@ -86,7 +86,7 @@ async def upload_docs( docs: list[UploadFile] = File(...)): if not (await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, kb_id, action)): raise Exception("用户没有权限上传文档到该知识库") - doc_ids = await DocumentService.upload_docs(kb_id, docs) + doc_ids = await DocumentService.upload_docs(user_sub, kb_id, docs) return UploadDocumentResponse(result=doc_ids) diff --git a/data_chain/apps/router/knowledge_base.py b/data_chain/apps/router/knowledge_base.py index 10ceebc..e03af2d 100644 --- a/data_chain/apps/router/knowledge_base.py +++ b/data_chain/apps/router/knowledge_base.py @@ -61,8 +61,8 @@ async def list_doc_types_by_kb_id( ): if not await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, kb_id, action): raise Exception("用户没有权限访问该知识库的文档类型") - list_doc_types_msg = await KnowledgeBaseService.list_doc_types_by_kb_id(kb_id) - return ListDocumentTypesResponse(result=list_doc_types_msg) + doc_types = await KnowledgeBaseService.list_doc_types_by_kb_id(kb_id) + return ListDocumentTypesResponse(result=doc_types) @router.get('/download', dependencies=[Depends(verify_user)]) @@ -129,7 +129,7 @@ async def export_kb_by_kb_ids( for kb_id in kb_ids: if not await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, kb_id, action): raise Exception("用户没有权限在该知识库导出知识库") - kb_export_task_ids = KnowledgeBaseService.export_kb_by_kb_ids(kb_ids) + kb_export_task_ids = await KnowledgeBaseService.export_kb_by_kb_ids(kb_ids) return ExportKnowledgeBaseResponse(result=kb_export_task_ids) diff --git a/data_chain/apps/service/chunk_service.py b/data_chain/apps/service/chunk_service.py index 821dcf0..833e9ec 100644 --- a/data_chain/apps/service/chunk_service.py +++ b/data_chain/apps/service/chunk_service.py @@ -112,13 +112,9 @@ class ChunkService: if req.is_classify_by_doc: doc_chunks = await BaseSearcher.classify_by_doc_id(chunk_entities) for doc_chunk in doc_chunks: - dc = DocChunk(doc_id=doc_chunk.doc_id, doc_name=doc_chunk.doc_name, chunks=[]) - for chunk_entity in doc_chunk.chunks: - chunk = await Convertor.convert_chunk_entity_to_chunk(chunk_entity) + for chunk in doc_chunk.chunks: if req.is_compress: chunk.text = TokenTool.compress_tokens(chunk.text) - dc.chunks.append(chunk) - search_chunk_msg.docChunks.append(dc) else: for chunk_entity in chunk_entities: chunk = await Convertor.convert_chunk_entity_to_chunk(chunk_entity) diff --git a/data_chain/apps/service/document_service.py b/data_chain/apps/service/document_service.py index 7f7fc8a..1243f5c 100644 --- a/data_chain/apps/service/document_service.py +++ b/data_chain/apps/service/document_service.py @@ -178,7 +178,7 @@ class DocumentService: logging.error("[DocumentService] %s", err) continue index = 0 - while len(index) < len(doc_entities): + while index < len(doc_entities): try: await DocumentManager.add_documents(doc_entities[index:index+1024]) index += 1024 diff --git a/data_chain/apps/service/knwoledge_base_service.py b/data_chain/apps/service/knwoledge_base_service.py index fe1fb97..874a716 100644 --- a/data_chain/apps/service/knwoledge_base_service.py +++ b/data_chain/apps/service/knwoledge_base_service.py @@ -8,6 +8,7 @@ from data_chain.logger.logger import logger as logging from data_chain.entities.request_data import ( ListTeamRequest, CreateKnowledgeBaseRequest, + DocumentType as DocumentTypeRequest, ListKnowledgeBaseRequest, UpdateKnowledgeBaseRequest ) @@ -15,6 +16,7 @@ from data_chain.entities.response_data import ( TeamKnowledgebase, ListAllKnowledgeBaseMsg, Team, + DocumentType as DocumentTypeResponse, ListKnowledgeBaseMsg, ListDocumentTypesResponse) from data_chain.apps.base.zip_handler import ZipHandler @@ -58,8 +60,7 @@ class KnowledgeBaseService: """列出知识库""" try: # 获取用户所在团队 - team_entities = await TeamManager.list_team_mycreated_user_sub(user_sub) - team_entities += await TeamManager.list_team_myjoined_by_user_sub(user_sub) + team_entities = await TeamManager.list_all_team_user_created_or_joined(user_sub) team_ids = [team_entity.id for team_entity in team_entities] # 获取知识库 knowledge_base_entities = await KnowledgeBaseManager.list_knowledge_base_by_team_ids(team_ids, kb_name) @@ -78,8 +79,16 @@ class KnowledgeBaseService: kbList=[] ) for knowledge_base_entity in knowledge_base_entities: + doc_type_entities = await KnowledgeBaseManager.list_doc_types_by_kb_id(knowledge_base_entity.id) + doc_types = [] + for doc_type_entity in doc_type_entities: + doc_types.append( + (await Convertor.convert_document_type_entity_to_document_type_response(doc_type_entity)) + ) + knowledge_base = await Convertor.convert_knowledge_base_entity_to_knowledge_base(knowledge_base_entity) + knowledge_base.doc_types = doc_types team_knowledge_base.kb_list.append( - Convertor.convert_knowledge_base_entity_to_knowledge_base(knowledge_base_entity) + knowledge_base ) team_knowledge_bases.append(team_knowledge_base) return ListAllKnowledgeBaseMsg(teamKnowledgebases=team_knowledge_bases) @@ -97,8 +106,15 @@ class KnowledgeBaseService: knowledge_bases = [] for knowledge_base_entity in knowledge_base_entities: doc_type_entities = await KnowledgeBaseManager.list_doc_types_by_kb_id(knowledge_base_entity.id) + doc_types = [] + for doc_type_entity in doc_type_entities: + doc_types.append( + (await Convertor.convert_document_type_entity_to_document_type_response(doc_type_entity)) + ) + knowledge_base = await Convertor.convert_knowledge_base_entity_to_knowledge_base(knowledge_base_entity) + knowledge_base.doc_types = doc_types knowledge_bases.append( - (await Convertor.convert_knowledge_base_entity_to_knowledge_base(knowledge_base_entity, doc_type_entities)) + knowledge_base ) return ListKnowledgeBaseMsg(total=total, kbList=knowledge_bases) except Exception as e: @@ -107,7 +123,7 @@ class KnowledgeBaseService: raise e @staticmethod - async def list_doc_types_by_kb_id(kb_id: uuid.UUID) -> ListDocumentTypesResponse: + async def list_doc_types_by_kb_id(kb_id: uuid.UUID) -> list[DocumentTypeResponse]: """列出知识库文档类型""" try: # 获取文档类型 @@ -117,7 +133,7 @@ class KnowledgeBaseService: document_types.append( (await Convertor.convert_document_type_entity_to_document_type_response(document_type_entity)) ) - return ListDocumentTypesResponse(documentTypes=document_types) + return document_types except Exception as e: err = "列出知识库文档类型失败" logging.exception("[KnowledgeBaseService] %s", err) @@ -255,9 +271,9 @@ class KnowledgeBaseService: return kb_export_task_ids @staticmethod - async def update_doc_types(kb_id: uuid.UUID, doc_types: list[dict]) -> None: - new_doc_type_map = {uuid.UUID(doc_type['doc_type_id']): doc_type['doc_type_name'] for doc_type in doc_types} - new_doc_type_ids = {uuid.UUID(doc_type['doc_type_id']) for doc_type in doc_types} + async def update_doc_types(kb_id: uuid.UUID, doc_types: list[DocumentTypeRequest]) -> None: + new_doc_type_map = {doc_type.doc_type_id: doc_type.doc_type_name for doc_type in doc_types} + new_doc_type_ids = {doc_type.doc_type_id for doc_type in doc_types} old_doc_type_entities = await KnowledgeBaseManager.list_doc_types_by_kb_id(kb_id) old_doc_type_ids = {doc_type_entity.id for doc_type_entity in old_doc_type_entities} delete_doc_type_ids = old_doc_type_ids - new_doc_type_ids @@ -286,14 +302,12 @@ class KnowledgeBaseService: """更新知识库""" try: knowledge_base_dict = await Convertor.convert_update_knowledge_base_request_to_dict(req) - doc_types = knowledge_base_dict.get("doc_types", []) - del knowledge_base_dict["doc_types"] knowledge_base_entity = await KnowledgeBaseManager.update_knowledge_base_by_kb_id(kb_id, knowledge_base_dict) if knowledge_base_entity is None: err = "更新知识库失败" logging.exception("[KnowledgeBaseService] %s", err) raise e - await KnowledgeBaseService.update_doc_types(kb_id, doc_types) + await KnowledgeBaseService.update_doc_types(kb_id, req.doc_types) return knowledge_base_entity.id except Exception as e: err = "更新知识库失败" @@ -310,7 +324,7 @@ class KnowledgeBaseService: if task_entity is not None: await TaskQueueService.stop_task(task_entity.id) await KnowledgeBaseManager.update_knowledge_base_by_kb_id( - kb_id, {"status": KnowledgeBaseStatus.DELETED}) + kb_id, {"status": KnowledgeBaseStatus.DELETED.value}) kb_ids_deleted.append(kb_id) except Exception as e: err = "删除知识库失败" diff --git a/data_chain/entities/common.py b/data_chain/entities/common.py index b826b46..395abdd 100644 --- a/data_chain/entities/common.py +++ b/data_chain/entities/common.py @@ -44,6 +44,8 @@ actions = [ 'name': '获取文档列表', 'action': 'POST /doc/list'}, {'type': 'document', 'name': '下载文档', 'action': 'GET /doc/download'}, + {'type': 'document', + 'name': '获取文档报告', 'action': 'GET /doc/report'}, {'type': 'document', 'name': '创建文档', 'action': 'POST /doc'}, {'type': 'document', @@ -99,7 +101,9 @@ actions = [ {'type': 'task', 'name': '获取任务报告', 'action': 'GET /task/report'}, {'type': 'task', - 'name': '删除任务', 'action': 'DELETE /task'}] + 'name': '删除单个任务', 'action': 'DELETE /task/one'}, + {'type': 'task', + 'name': '删除单个任务', 'action': 'DELETE /task/all'}] default_roles = [ { "id": uuid.UUID("00000000-0000-0000-0000-000000000001"), @@ -148,6 +152,8 @@ default_roles = [ 'name': '获取文档列表', 'action': 'POST /doc/list'}, {'type': 'document', 'name': '下载文档', 'action': 'GET /doc/download'}, + {'type': 'document', + 'name': '获取文档报告', 'action': 'GET /doc/report'}, {'type': 'document', 'name': '创建文档', 'action': 'POST /doc'}, {'type': 'document', @@ -203,7 +209,9 @@ default_roles = [ {'type': 'task', 'name': '获取任务报告', 'action': 'GET /task/report'}, {'type': 'task', - 'name': '删除任务', 'action': 'DELETE /task'}], + 'name': '删除单个任务', 'action': 'DELETE /task/one'}, + {'type': 'task', + 'name': '删除单个任务', 'action': 'DELETE /task/all'}], "editable": False, }, { "id": uuid.UUID("00000000-0000-0000-0000-000000000002"), @@ -246,6 +254,8 @@ default_roles = [ 'name': '获取文档列表', 'action': 'POST /doc/list'}, {'type': 'document', 'name': '下载文档', 'action': 'GET /doc/download'}, + {'type': 'document', + 'name': '获取文档报告', 'action': 'GET /doc/report'}, {'type': 'document', 'name': '创建文档', 'action': 'POST /doc'}, {'type': 'document', @@ -295,7 +305,9 @@ default_roles = [ {'type': 'task', 'name': '获取任务报告', 'action': 'GET /task/report'}, {'type': 'task', - 'name': '删除任务', 'action': 'DELETE /task'}], + 'name': '删除单个任务', 'action': 'DELETE /task/one'}, + {'type': 'task', + 'name': '删除单个任务', 'action': 'DELETE /task/all'}], "editable": False, }, { "id": uuid.UUID("00000000-0000-0000-0000-000000000003"), diff --git a/data_chain/entities/request_data.py b/data_chain/entities/request_data.py index 6feceeb..f363e07 100644 --- a/data_chain/entities/request_data.py +++ b/data_chain/entities/request_data.py @@ -60,8 +60,8 @@ class UpdateTeamRequest(BaseModel): class DocumentType(BaseModel): - doc_type_id: uuid.UUID - doc_type_name: str = Field(default='这是一个默认的文档类型名称', min_length=1, max_length=20) + doc_type_id: uuid.UUID = Field(default=DEFAULt_DOC_TYPE_ID, description="文档类型的id", alias="docTypeId") + doc_type_name: str = Field(default='这是一个默认的文档类型名称', min_length=1, max_length=20, alias="docTypeName") class ListKnowledgeBaseRequest(BaseModel): @@ -214,8 +214,9 @@ class CreateTestingRequest(BaseModel): description: str = Field(default='', description="测试简介", max_length=200) dataset_id: uuid.UUID = Field(description="测试数据集id", alias="datasetId") llm_id: str = Field(description="测试使用的大模型id", alias="llmId") - search_method: SearchMethod = Field(default=SearchMethod.KEYWORD_EMBEDDING, + search_method: SearchMethod = Field(default=SearchMethod.KEYWORD_AND_VECTOR, description="测试使用的检索方法", alias="searchMethod") + top_k: int = Field(default=5, description="测试中检索方法关联的片段数量", alias="topK") class UpdateTestingRequest(BaseModel): @@ -223,8 +224,9 @@ class UpdateTestingRequest(BaseModel): min_length=1, max_length=150, alias="testingName") description: str = Field(default='', description="测试简介", max_length=200) llm_id: str = Field(description="测试使用的大模型id", alias="llmId") - search_method: SearchMethod = Field(default=SearchMethod.KEYWORD_EMBEDDING, + search_method: SearchMethod = Field(default=SearchMethod.KEYWORD_AND_VECTOR, description="测试使用的检索方法", alias="searchMethod") + top_k: int = Field(default=5, description="测试中检索方法关联的片段数量", alias="topK") class ListRoleRequest(BaseModel): diff --git a/data_chain/manager/knowledge_manager.py b/data_chain/manager/knowledge_manager.py index e1e9e7a..56d3eac 100644 --- a/data_chain/manager/knowledge_manager.py +++ b/data_chain/manager/knowledge_manager.py @@ -29,7 +29,7 @@ class KnowledgeBaseManager(): try: async with await DataBase.get_session() as session: stmt = select(KnowledgeBaseEntity).where(and_(KnowledgeBaseEntity.id == kb_id, - KnowledgeBaseEntity.status != KnowledgeBaseStatus.DEELTED)) + KnowledgeBaseEntity.status != KnowledgeBaseStatus.DELETED.value)) result = await session.execute(stmt) knowledge_base_entity = result.scalars().first() return knowledge_base_entity diff --git a/data_chain/manager/team_manager.py b/data_chain/manager/team_manager.py index 9efe572..ebee5a1 100644 --- a/data_chain/manager/team_manager.py +++ b/data_chain/manager/team_manager.py @@ -84,6 +84,25 @@ class TeamManager: logging.exception("[TeamManager] %s", err) raise e + @staticmethod + async def list_all_team_user_created_or_joined(user_sub: str) -> list[TeamEntity]: + """列出我创建或加入的团队""" + try: + async with await DataBase.get_session() as session: + stmt = select(TeamEntity).where(and_( + TeamEntity.author_id == user_sub, TeamEntity.status != TeamStatus.DELETED.value)) + result = await session.execute(stmt) + team_entities = result.scalars().all() + stmt = select(TeamEntity).join(TeamUserEntity, TeamEntity.id == TeamUserEntity.team_id).where( + and_(TeamUserEntity.user_id == user_sub, TeamEntity.status != TeamStatus.DELETED.value)) + result = await session.execute(stmt) + team_entities += result.scalars().all() + return team_entities + except Exception as e: + err = "列出我创建或加入的团队失败" + logging.exception("[TeamManager] %s", err) + raise e + @staticmethod async def list_pulic_team(req: ListTeamRequest) -> list[TeamEntity]: """列出公开的团队""" diff --git a/data_chain/parser/tools/token_tool.py b/data_chain/parser/tools/token_tool.py index a96bcb2..32c75ca 100644 --- a/data_chain/parser/tools/token_tool.py +++ b/data_chain/parser/tools/token_tool.py @@ -445,7 +445,7 @@ class TokenTool: return score @staticmethod - def cal_leve(str1: str, str2: str2) -> float: + def cal_leve(str1: str, str2: str) -> float: """ 计算两个字符串的编辑距离 """ diff --git a/data_chain/rag/base_searcher.py b/data_chain/rag/base_searcher.py index c65f6c1..f0b94b7 100644 --- a/data_chain/rag/base_searcher.py +++ b/data_chain/rag/base_searcher.py @@ -3,18 +3,11 @@ import uuid from pydantic import BaseModel, Field import random from data_chain.logger.logger import logger as logging +from data_chain.apps.base.convertor import Convertor from data_chain.stores.database.database import ChunkEntity from data_chain.parser.tools.token_tool import TokenTool from data_chain.manager.chunk_manager import ChunkManager - - -class DocChunk(BaseModel): - """ - 文档块 - """ - doc_id: uuid.UUID = Field(..., description="文档ID") - doc_name: str = Field(..., description="文档名称") - chunks: list[ChunkEntity] = Field(..., description="文档块列表") +from data_chain.entities.response_data import Chunk, DocChunk class BaseSearcher: @@ -152,11 +145,11 @@ class BaseSearcher: :return: 分类后的结果 """ doc_chunk_dict = {} + chunk_entities = sorted(chunk_entities, key=lambda x: x.global_offset) for chunk_entity in chunk_entities: if chunk_entity.doc_id not in doc_chunk_dict: doc_chunk_dict[chunk_entity.doc_id] = DocChunk( doc_id=chunk_entity.doc_id, doc_name=chunk_entity.doc_name, chunks=[]) - doc_chunk_dict[chunk_entity.doc_id].chunks.append(chunk_entity) - for doc_chunk in doc_chunk_dict.values(): - doc_chunk.chunks = sorted(doc_chunk.chunks, key=lambda x: x.global_offset) + chunk = await Convertor.convert_chunk_entity_to_chunk(chunk_entity) + doc_chunk_dict[chunk_entity.doc_id].chunks.append(chunk) return list(doc_chunk_dict.values()) diff --git a/data_chain/stores/database/database.py b/data_chain/stores/database/database.py index 3f81485..b247dfd 100644 --- a/data_chain/stores/database/database.py +++ b/data_chain/stores/database/database.py @@ -431,7 +431,8 @@ class TestingEntity(Base): name = Column(String) # 测试任务的名称 description = Column(String) # 测试任务的描述 llm_id = Column(String) # 测试任务的使用的大模型 - search_method = Column(String, default=SearchMethod.KEYWORD_EMBEDDING.value) # 测试任务的使用的检索增强模式类型 + search_method = Column(String, default=SearchMethod.KEYWORD_AND_VECTOR.value) # 测试任务的使用的检索增强模式类型 + top_k = Column(Integer, default=5) # 测试任务的检索增强模式的top_k status = Column(String, default=TestingStatus.IDLE.value) # 测试任务的状态 ave_score = Column(Float) # 测试任务的综合得分 ave_pre = Column(Float) # 测试任务的平均召回率 diff --git a/data_chain/stores/mongodb/mongodb.py b/data_chain/stores/mongodb/mongodb.py index 3fdd18d..02d58b7 100644 --- a/data_chain/stores/mongodb/mongodb.py +++ b/data_chain/stores/mongodb/mongodb.py @@ -8,7 +8,7 @@ from typing import TYPE_CHECKING import uuid from data_chain.config.config import config -from data_chain.logging.logging import logging as logging +from data_chain.logger.logger import logger as logging class Session(BaseModel): diff --git a/openapi.json b/openapi.json index f1e5934..0652e76 100644 --- a/openapi.json +++ b/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"FastAPI","version":"0.1.0"},"paths":{"/routes":{"get":{"summary":"Get All Routes","operationId":"get_all_routes_routes_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/team/list":{"post":{"tags":["Team"],"summary":"List Teams","operationId":"list_teams_team_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/usr":{"post":{"tags":["Team"],"summary":"List Team User By Team Id","operationId":"list_team_user_by_team_id_team_usr_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_usr_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamUserRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team User By Team Id And User Subs","operationId":"delete_team_user_by_team_id_and_user_subs_team_usr_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string"},"title":"Usersub"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/msg":{"post":{"tags":["Team"],"summary":"List Team Msg By Team Id","operationId":"list_team_msg_by_team_id_team_msg_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team":{"post":{"tags":["Team"],"summary":"Create Team","operationId":"create_team_team_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team By Team Id","operationId":"delete_team_by_team_id_team_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/invitation":{"post":{"tags":["Team"],"summary":"Invite Team User By User Sub","operationId":"invite_team_user_by_user_sub_team_invitation_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"userSubInvite","in":"query","required":true,"schema":{"type":"string","title":"Usersubinvite"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/InviteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/application":{"post":{"tags":["Team"],"summary":"Join Team","operationId":"join_team_team_application_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JoinTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/author":{"put":{"tags":["Team"],"summary":"Update Team Author By Team Id","operationId":"update_team_author_by_team_id_team_author_put","parameters":[{"name":"recriverSub","in":"query","required":true,"schema":{"type":"string","title":"Recriversub"}},{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamAuthorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb":{"get":{"tags":["Knowledge Base"],"summary":"List Kb By User Sub","operationId":"list_kb_by_user_sub_kb_get","parameters":[{"name":"kbName","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListAllKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"tags":["Knowledge Base"],"summary":"Create Kb","operationId":"create_kb_kb_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Knowledge Base"],"summary":"Update Kb By Kb Id","operationId":"update_kb_by_kb_id_kb_put","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Knowledge Base"],"summary":"Delete Kb By Kb Ids","operationId":"delete_kb_by_kb_ids_kb_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/team":{"post":{"tags":["Knowledge Base"],"summary":"List Kb By Team Id","operationId":"list_kb_by_team_id_kb_team_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/doc_type":{"get":{"tags":["Knowledge Base"],"summary":"List Doc Types By Kb Id","operationId":"list_doc_types_by_kb_id_kb_doc_type_get","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentTypesResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/download":{"get":{"tags":["Knowledge Base"],"summary":"Download Kb By Task Id","operationId":"download_kb_by_task_id_kb_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/import":{"post":{"tags":["Knowledge Base"],"summary":"Import Kbs","operationId":"import_kbs_kb_import_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_kbs_kb_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/export":{"post":{"tags":["Knowledge Base"],"summary":"Export Kb By Kb Ids","operationId":"export_kb_by_kb_ids_kb_export_post","parameters":[{"name":"kbIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/list":{"post":{"tags":["Chunk"],"summary":"List Chunks By Document Id","operationId":"list_chunks_by_document_id_chunk_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk":{"put":{"tags":["Chunk"],"summary":"Update Chunk By Id","operationId":"update_chunk_by_id_chunk_put","parameters":[{"name":"chunkId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Chunkid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/list":{"post":{"tags":["Document"],"summary":"List Doc","operationId":"list_doc_doc_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/download":{"get":{"tags":["Document"],"summary":"Download Doc By Id","operationId":"download_doc_by_id_doc_download_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/report":{"get":{"tags":["Document"],"summary":"Get Doc Report","operationId":"get_doc_report_doc_report_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetDocumentReportResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc":{"post":{"tags":["Document"],"summary":"Upload Docs","operationId":"upload_docs_doc_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_upload_docs_doc_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UploadDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Document"],"summary":"Update Doc By Doc Id","operationId":"update_doc_by_doc_id_doc_put","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/parse":{"post":{"tags":["Document"],"summary":"Parse Docuement By Doc Ids","operationId":"parse_docuement_by_doc_ids_doc_parse_post","parameters":[{"name":"docIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}},{"name":"parse","in":"query","required":true,"schema":{"type":"boolean","title":"Parse"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ParseDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/health_check":{"get":{"tags":["Health check"],"summary":"Health Check","operationId":"health_check_health_check_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/dataset/list":{"post":{"tags":["Dataset"],"summary":"List Dataset By Kb Id","operationId":"list_dataset_by_kb_id_dataset_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/data":{"post":{"tags":["Dataset"],"summary":"List Data In Dataset","operationId":"list_data_in_dataset_dataset_data_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Data By Dataset Id","operationId":"update_data_by_dataset_id_dataset_data_put","parameters":[{"name":"dataId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Dataid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Data By Data Ids","operationId":"delete_data_by_data_ids_dataset_data_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Dataids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/testing/exist":{"get":{"tags":["Dataset"],"summary":"Is Dataset Have Testing","operationId":"is_dataset_have_testing_dataset_testing_exist_get","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/IsDatasetHaveTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/download":{"get":{"tags":["Dataset"],"summary":"Download Dataset By Task Id","operationId":"download_dataset_by_task_id_dataset_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset":{"post":{"tags":["Dataset"],"summary":"Create Dataset","operationId":"create_dataset_dataset_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Dataset By Dataset Id","operationId":"update_dataset_by_dataset_id_dataset_put","parameters":[{"name":"databaseId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Databaseid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Dataset By Dataset Ids","operationId":"delete_dataset_by_dataset_ids_dataset_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Databaseid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/import":{"post":{"tags":["Dataset"],"summary":"Import Dataset","operationId":"import_dataset_dataset_import_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_dataset_dataset_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/export":{"post":{"tags":["Dataset"],"summary":"Export Dataset By Dataset Ids","operationId":"export_dataset_by_dataset_ids_dataset_export_post","parameters":[{"name":"datasetIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Datasetids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/generate":{"post":{"tags":["Dataset"],"summary":"Generate Dataset By Id","operationId":"generate_dataset_by_id_dataset_generate_post","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}},{"name":"generate","in":"query","required":true,"schema":{"type":"boolean","title":"Generate"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/other/llm":{"get":{"tags":["Other"],"summary":"List Llms By User Sub","operationId":"list_llms_by_user_sub_other_llm_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListLLMResponse"}}}}}}},"/other/embedding":{"get":{"tags":["Other"],"summary":"List Embeddings","operationId":"list_embeddings_other_embedding_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListEmbeddingResponse"}}}}}}},"/other/tokenizer":{"get":{"tags":["Other"],"summary":"List Tokenizers","operationId":"list_tokenizers_other_tokenizer_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTokenizerResponse"}}}}}}},"/other/parse_method":{"get":{"tags":["Other"],"summary":"List Parse Method","operationId":"list_parse_method_other_parse_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListParseMethodResponse"}}}}}}},"/other/search_method":{"get":{"tags":["Other"],"summary":"List Search Method","operationId":"list_search_method_other_search_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListSearchMethodResponse"}}}}}}},"/testing/list":{"post":{"tags":["Testing"],"summary":"List Testing By Kb Id","operationId":"list_testing_by_kb_id_testing_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/testcase":{"post":{"tags":["Testing"],"summary":"List Testcase By Testing Id","operationId":"list_testcase_by_testing_id_testing_testcase_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestCaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/download":{"get":{"tags":["Testing"],"summary":"Download Testing Report By Testing Id","operationId":"download_testing_report_by_testing_id_testing_download_get","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing":{"post":{"tags":["Testing"],"summary":"Create Testing","operationId":"create_testing_testing_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingResponsing"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Testing"],"summary":"Update Testing By Testing Id","operationId":"update_testing_by_testing_id_testing_put","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Testing"],"summary":"Delete Testing By Testing Ids","operationId":"delete_testing_by_testing_ids_testing_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Testingids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/run":{"post":{"tags":["Testing"],"summary":"Run Testing By Testing Id","operationId":"run_testing_by_testing_id_testing_run_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}},{"name":"run","in":"query","required":true,"schema":{"type":"boolean","title":"Run"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/RunTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role/action":{"get":{"tags":["Role"],"summary":"List Actions","operationId":"list_actions_role_action_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListActionResponse"}}}}}}},"/role/list":{"post":{"tags":["Role"],"summary":"List Role By Team Id","operationId":"list_role_by_team_id_role_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role":{"post":{"tags":["Role"],"summary":"Create Role","operationId":"create_role_role_post","parameters":[{"name":"TeamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Role"],"summary":"Update Role By Role Id","operationId":"update_role_by_role_id_role_put","parameters":[{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Role"],"summary":"Delete Role By Role Ids","operationId":"delete_role_by_role_ids_role_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Roleid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg/list":{"post":{"tags":["User Message"],"summary":"List User Msgs By User Sub","operationId":"list_user_msgs_by_user_sub_usr_msg_list_post","parameters":[{"name":"msgType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserMessageType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg":{"put":{"tags":["User Message"],"summary":"Update User Msg By Msg Id","operationId":"update_user_msg_by_msg_id_usr_msg_put","parameters":[{"name":"msgId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Msgid"}},{"name":"msgStatus","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserStatus"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["User Message"],"summary":"Delete User Msg By Msg Ids","operationId":"delete_user_msg_by_msg_ids_usr_msg_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Msgids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task":{"post":{"tags":["Task"],"summary":"List Task","operationId":"list_task_task_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/one":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Id","operationId":"delete_task_by_task_id_task_one_delete","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByIdResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/all":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Type","operationId":"delete_task_by_task_type_task_all_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"taskType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/TaskType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByTypeResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"ActionType":{"type":"string","enum":["team","user","role","knowledge_base","document","chunk","dataset","testing","dataset_data","task"],"title":"ActionType","description":"操作类型"},"Body_import_dataset_dataset_import_post":{"properties":{"dataset_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Dataset Packages"}},"type":"object","required":["dataset_packages"],"title":"Body_import_dataset_dataset_import_post"},"Body_import_kbs_kb_import_post":{"properties":{"kb_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Kb Packages"}},"type":"object","required":["kb_packages"],"title":"Body_import_kbs_kb_import_post"},"Body_upload_docs_doc_post":{"properties":{"docs":{"items":{"type":"string","format":"binary"},"type":"array","title":"Docs"}},"type":"object","required":["docs"],"title":"Body_upload_docs_doc_post"},"Chunk":{"properties":{"chunkId":{"type":"string","format":"uuid","title":"Chunkid","description":"分片ID"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"分片类型"},"text":{"type":"string","title":"Text","description":"分片文本"}},"type":"object","required":["chunkId","chunkType","text"],"title":"Chunk","description":"文档分片信息"},"ChunkType":{"type":"string","enum":["text","table","image","code","link"],"title":"ChunkType","description":"分块类型"},"CreateDatasetRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""},"documentIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Documentids","description":"测试数据集关联的文档","default":[]},"dataCnt":{"type":"integer","title":"Datacnt","description":"测试数据集内的数据数量","default":64,"max":512,"min":1},"llmId":{"type":"string","title":"Llmid","description":"测试数据集使用的大模型id"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"测试数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"测试数据集进行上下文关联","default":false}},"type":"object","required":["kbId","llmId"],"title":"CreateDatasetRequest"},"CreateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集生成任务ID"}},"type":"object","title":"CreateDatasetResponse","description":"POST /dataset 响应"},"CreateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":20,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"知识库使用的embedding模型","default":""},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"CreateKnowledgeBaseRequest"},"CreateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"CreateKnowledgeBaseResponse","description":"POST /kb 响应"},"CreateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"CreateRoleRequest"},"CreateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"CreateRoleResponse","description":"POST /role 响应"},"CreateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"CreateTeamRequest"},"CreateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"CreateTeamResponse","description":"POST /team 响应"},"CreateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":30,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"测试数据集id"},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_embedding"}},"type":"object","required":["datasetId","llmId"],"title":"CreateTestingRequest"},"CreateTestingResponsing":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"CreateTestingResponsing","description":"POST /testing 响应"},"Data":{"properties":{"dataId":{"type":"string","format":"uuid","title":"Dataid","description":"数据ID"},"docName":{"type":"string","title":"Docname","description":"数据关联的文档名称"},"question":{"type":"string","title":"Question","description":"数据的问题"},"answer":{"type":"string","title":"Answer","description":"数据的答案"},"chunk":{"type":"string","title":"Chunk","description":"数据的片段"}},"type":"object","required":["dataId","docName","question","answer","chunk"],"title":"Data"},"DataSetStatus":{"type":"string","enum":["idle","pending","generating","exporting","importing","deleted"],"title":"DataSetStatus","description":"数据集状态"},"Dataset":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"数据集描述","max":150},"dataCnt":{"type":"integer","title":"Datacnt","description":"数据集数据数量"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"数据集进行上下文关联","default":false},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"生成数据集使用的大模型信息"},"generateTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"数据集生成任务"},"score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Score","description":"数据集评分"},"authorName":{"type":"string","title":"Authorname","description":"数据集创建者的用户名"},"status":{"$ref":"#/components/schemas/DataSetStatus"}},"type":"object","required":["datasetId","datasetName","description","dataCnt","authorName","status"],"title":"Dataset","description":"数据集信息"},"DatasetTesting":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称"},"testings":{"items":{"$ref":"#/components/schemas/Testing"},"type":"array","title":"Testings","description":"测试列表","default":[]}},"type":"object","required":["datasetId","datasetName"],"title":"DatasetTesting","description":"数据集测试信息"},"DeleteDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据ID列表","default":[]}},"type":"object","title":"DeleteDataResponse","description":"DELETE /dataset/data 响应"},"DeleteDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据集ID列表","default":[]}},"type":"object","title":"DeleteDatasetResponse","description":"DELETE /dataset 响应"},"DeleteDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"DeleteDocumentResponse","description":"DELETE /doc 响应"},"DeleteKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"知识库ID列表","default":[]}},"type":"object","title":"DeleteKnowledgeBaseResponse","description":"DELETE /kb 响应"},"DeleteRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"角色ID列表","default":[]}},"type":"object","title":"DeleteRoleResponse","description":"DELETE /role 响应"},"DeleteTaskByIdResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"任务ID"}},"type":"object","title":"DeleteTaskByIdResponse","description":"DELETE /task/one 响应"},"DeleteTaskByTypeResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"DeleteTaskByTypeResponse","description":"DELETE /task/all 响应"},"DeleteTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"DeleteTeamResponse","description":"DELETE /team 响应"},"DeleteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"团队成员ID列表","default":[]}},"type":"object","title":"DeleteTeamUserResponse","description":"DELETE /team/usr 响应"},"DeleteTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"测试ID列表","default":[]}},"type":"object","title":"DeleteTestingResponse","description":"DELETE /testing 响应"},"DeleteUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"消息ID列表","default":[]}},"type":"object","title":"DeleteUserMessageResponse","description":"DELETE /usr_msg 响应"},"Document":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"docType":{"allOf":[{"$ref":"#/components/schemas/DocumentType-Output"}],"description":"文档类型"},"chunkSize":{"type":"integer","title":"Chunksize","description":"文档分片大小"},"createdTime":{"type":"string","title":"Createdtime","description":"文档创建时间"},"docTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"文档任务"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"文档解析方法"},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用"},"authorName":{"type":"string","title":"Authorname","description":"文档创建者的用户名"},"status":{"$ref":"#/components/schemas/DocumentStatus"}},"type":"object","required":["docId","docName","docType","chunkSize","createdTime","docTask","parseMethod","enabled","authorName","status"],"title":"Document","description":"文档信息"},"DocumentStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"DocumentStatus","description":"文档状态"},"DocumentType-Input":{"properties":{"doc_type_id":{"type":"string","format":"uuid","title":"Doc Type Id"},"doc_type_name":{"type":"string","maxLength":20,"minLength":1,"title":"Doc Type Name","default":"这是一个默认的文档类型名称"}},"type":"object","required":["doc_type_id"],"title":"DocumentType"},"DocumentType-Output":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型ID"},"docTypeName":{"type":"string","title":"Doctypename","description":"文档类型名称"}},"type":"object","required":["docTypeId","docTypeName"],"title":"DocumentType","description":"文档类型信息"},"ExportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ExportDatasetResponse","description":"POST /dataset/export 响应"},"ExportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ExportKnowledgeBaseResponse","description":"POST /kb/export 响应"},"GenerateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"GenerateDatasetResponse","description":"POST /dataset/generate 响应"},"GetDocumentReportResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","title":"Result","description":"文档报告数据结构","default":""}},"type":"object","title":"GetDocumentReportResponse","description":"GET /doc/report 响应"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ImportDatasetResponse","description":"POST /dataset/import 响应"},"ImportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ImportKnowledgeBaseResponse","description":"POST /kb/import 响应"},"InviteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"邀请ID"}},"type":"object","title":"InviteTeamUserResponse","description":"POST /team/invitation 响应"},"IsDatasetHaveTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"boolean","title":"Result","description":"数据集是否有测试任务","default":false}},"type":"object","title":"IsDatasetHaveTestingResponse","description":"GET /dataset/testing/exist 响应"},"JoinTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"申请ID"}},"type":"object","title":"JoinTeamResponse","description":"POST /team/application 响应"},"Knowledgebase":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"知识库ID"},"kbName":{"type":"string","title":"Kbname","description":"知识库名称","max":20,"min":1},"authorName":{"type":"string","title":"Authorname","description":"知识库创建者的用户名"},"tokenizer":{"$ref":"#/components/schemas/Tokenizer"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"嵌入模型"},"description":{"type":"string","title":"Description","description":"知识库描述","max":150},"docCnt":{"type":"integer","title":"Doccnt","description":"知识库文档数量"},"docSize":{"type":"integer","title":"Docsize","description":"知识库文档大小"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库单次文件上传数量限制"},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库单次文件上传大小限制"},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"默认解析方法"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"默认分块大小"},"createdTime":{"type":"string","title":"Createdtime","description":"知识库创建时间"},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Doctypes","description":"知识库文档类型列表","default":[]}},"type":"object","required":["kbId","kbName","authorName","tokenizer","embeddingModel","description","docCnt","docSize","uploadCountLimit","uploadSizeLimit","defaultParseMethod","defaultChunkSize","createdTime"],"title":"Knowledgebase","description":"知识库信息"},"LLM":{"properties":{"llmId":{"type":"string","title":"Llmid","description":"大模型ID"},"llmName":{"type":"string","title":"Llmname","description":"大模型名称","max":20,"min":1},"llmIcon":{"type":"string","title":"Llmicon","description":"大模型图标"}},"type":"object","required":["llmId","llmName","llmIcon"],"title":"LLM"},"ListActionMsg":{"properties":{"actionTypes":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Actiontypes","description":"操作类型列表","default":[]}},"type":"object","title":"ListActionMsg","description":"GET /role/action 数据结构"},"ListActionResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListActionMsg"}],"description":"操作列表数据结构","default":{"actionTypes":[]}}},"type":"object","title":"ListActionResponse"},"ListAllKnowledgeBaseMsg":{"properties":{"teamKnowledgebases":{"items":{"$ref":"#/components/schemas/TeamKnowledgebase"},"type":"array","title":"Teamknowledgebases","description":"团队知识库列表","default":[]}},"type":"object","title":"ListAllKnowledgeBaseMsg","description":"GET /kb 数据结构"},"ListAllKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListAllKnowledgeBaseMsg"}],"description":"团队知识库列表数据结构","default":{"teamKnowledgebases":[]}}},"type":"object","title":"ListAllKnowledgeBaseResponse","description":"GET /kb 响应"},"ListChunkMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","title":"ListChunkMsg","description":"GET /chunk 数据结构"},"ListChunkRequest":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档id"},"text":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Text","description":"分块文本内容"},"type":{"anyOf":[{"$ref":"#/components/schemas/ChunkType"},{"type":"null"}],"description":"分块类型"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["docId"],"title":"ListChunkRequest"},"ListChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListChunkMsg"}],"description":"分片列表数据结构","default":{"total":0,"chunks":[]}}},"type":"object","title":"ListChunkResponse","description":"GET /chunk 响应"},"ListDataInDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"data":{"items":{"$ref":"#/components/schemas/Data"},"type":"array","title":"Data","description":"数据列表","default":[]}},"type":"object","title":"ListDataInDatasetMsg","description":"GET /dataset/data 数据结构"},"ListDataInDatasetRequest":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["datasetId"],"title":"ListDataInDatasetRequest"},"ListDataInDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDataInDatasetMsg"}],"description":"数据列表数据结构","default":{"total":0,"data":[]}}},"type":"object","title":"ListDataInDatasetResponse","description":"GET /dataset/data 响应"},"ListDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasets":{"items":{"$ref":"#/components/schemas/Dataset"},"type":"array","title":"Datasets","description":"数据集列表","default":[]}},"type":"object","title":"ListDatasetMsg","description":"GET /dataset 数据结构"},"ListDatasetRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"datasetId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Datasetid","description":"数据集id"},"datasetName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Datasetname","description":"数据集名称"},"dataCnt":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集数据数量","default":"desc"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"数据集使用的大模型id"},"isDataCleared":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Isdatacleared","description":"数据集是否清洗"},"isChunkRelated":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Ischunkrelated","description":"数据集是否上下文关联"},"generateStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Generatestatus","description":"数据集生成状态"},"scoreOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集评分的排序方法","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"数据集创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListDatasetRequest"},"ListDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDatasetMsg"}],"description":"数据集列表数据结构","default":{"total":0,"datasets":[]}}},"type":"object","title":"ListDatasetResponse","description":"GET /dataset 响应"},"ListDocumentMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"documents":{"items":{"$ref":"#/components/schemas/Document"},"type":"array","title":"Documents","description":"文档列表","default":[]}},"type":"object","title":"ListDocumentMsg","description":"GET /doc 数据结构"},"ListDocumentRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"docId":{"anyOf":[{"type":"string","maxLength":30,"minLength":1,"format":"uuid"},{"type":"null"}],"title":"Docid","description":"文档id"},"docName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Docname","description":"文档名称"},"docTypeId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Doctypeid","description":"文档类型id"},"parseStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Parsestatus","description":"文档解析状态"},"parseMethod":{"anyOf":[{"$ref":"#/components/schemas/ParseMethod"},{"type":"null"}],"description":"文档解析方法"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"文档创建者"},"createdTimeStart":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimestart","description":"文档创建时间开始"},"createdTimeEnd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimeend","description":"文档创建时间结束"},"createdTimeOrder":{"allOf":[{"$ref":"#/components/schemas/OrderType"}],"description":"文档创建时间排序","default":"desc"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListDocumentRequest"},"ListDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDocumentMsg"}],"description":"文档列表数据结构","default":{"total":0,"documents":[]}}},"type":"object","title":"ListDocumentResponse","description":"GET /doc 响应"},"ListDocumentTypesResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Result","description":"文档类型列表数据结构","default":[]}},"type":"object","title":"ListDocumentTypesResponse","description":"GET /kb/doc_type 响应"},"ListEmbeddingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"向量化模型的列表数据结构","default":[]}},"type":"object","title":"ListEmbeddingResponse","description":"GET /other/embedding 数据结构"},"ListKnowledgeBaseMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表数据结构","default":[]}},"type":"object","title":"ListKnowledgeBaseMsg"},"ListKnowledgeBaseRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"kbName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname","description":"资产名称"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"资产创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListKnowledgeBaseRequest"},"ListKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListKnowledgeBaseMsg"}],"default":{"total":0,"kbList":[]}}},"type":"object","title":"ListKnowledgeBaseResponse","description":"GET /kb/team 响应"},"ListLLMMsg":{"properties":{"llms":{"items":{"$ref":"#/components/schemas/LLM"},"type":"array","title":"Llms","description":"大模型列表","default":[]}},"type":"object","title":"ListLLMMsg","description":"GET /other/llm 数据结构"},"ListLLMResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListLLMMsg"}],"description":"大模型列表数据结构","default":{"llms":[]}}},"type":"object","title":"ListLLMResponse","description":"GET /other/llm 响应"},"ListParseMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"解析方法的列表数据结构","default":[]}},"type":"object","title":"ListParseMethodResponse","description":"\"GET /other/parse_method 响应"},"ListRoleMsg":{"properties":{"roles":{"items":{"$ref":"#/components/schemas/role"},"type":"array","title":"Roles","description":"角色列表","default":[]}},"type":"object","title":"ListRoleMsg","description":"GET /role 数据结构"},"ListRoleRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"roleId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Roleid","description":"角色id"},"roleName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rolename","description":"角色名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListRoleRequest"},"ListRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListRoleMsg"}],"description":"角色列表数据结构","default":{"roles":[]}}},"type":"object","title":"ListRoleResponse","description":"GET /role 响应"},"ListSearchMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"搜索方法的列表数据结构","default":[]}},"type":"object","title":"ListSearchMethodResponse","description":"GET /other/search_method 响应"},"ListTaskMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"tasks":{"items":{"$ref":"#/components/schemas/Task"},"type":"array","title":"Tasks","description":"任务列表","default":[]}},"type":"object","title":"ListTaskMsg","description":"GET /task 数据结构"},"ListTaskRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"taskId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Taskid","description":"任务id"},"taskType":{"anyOf":[{"$ref":"#/components/schemas/TaskType"},{"type":"null"}],"description":"任务类型"},"taskStatus":{"anyOf":[{"$ref":"#/components/schemas/TaskStatus"},{"type":"null"}],"description":"任务状态"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTaskRequest"},"ListTaskResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTaskMsg"}],"description":"任务列表数据结构","default":{"total":0,"tasks":[]}}},"type":"object","title":"ListTaskResponse","description":"GET /task 响应"},"ListTeamMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teams":{"items":{"$ref":"#/components/schemas/Team"},"type":"array","title":"Teams","description":"团队列表","default":[]}},"type":"object","title":"ListTeamMsg","description":"GET /team 数据结构"},"ListTeamMsgMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamMsgs":{"items":{"$ref":"#/components/schemas/TeamMsg"},"type":"array","title":"Teammsgs","description":"团队消息列表","default":[]}},"type":"object","title":"ListTeamMsgMsg","description":"GET /team/msg 数据结构"},"ListTeamMsgRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamMsgRequest"},"ListTeamMsgResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsgMsg"}],"description":"团队消息列表数据结构","default":{"total":0,"teamMsgs":[]}}},"type":"object","title":"ListTeamMsgResponse"},"ListTeamRequest":{"properties":{"teamType":{"allOf":[{"$ref":"#/components/schemas/TeamType"}],"description":"团队类型","default":"all"},"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"teamName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Teamname","description":"团队名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamRequest"},"ListTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsg"}],"description":"团队列表数据结构","default":{"total":0,"teams":[]}}},"type":"object","title":"ListTeamResponse","description":"GET /team 响应"},"ListTeamUserMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamUsers":{"items":{"$ref":"#/components/schemas/TeamUser"},"type":"array","title":"Teamusers","description":"团队成员列表","default":[]}},"type":"object","title":"ListTeamUserMsg","description":"GET /team/usr 数据结构"},"ListTeamUserRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"userSub":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Usersub","description":"用户ID"},"userName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Username","description":"用户名"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListTeamUserRequest"},"ListTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamUserMsg"}],"description":"团队成员列表数据结构","default":{"total":0,"teamUsers":[]}}},"type":"object","title":"ListTeamUserResponse"},"ListTestCaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/TestingTestCase"}],"description":"测试用例列表数据结构","default":{"aveScore":-1.0,"avePre":-1.0,"aveRec":-1.0,"aveFai":-1.0,"aveRel":-1.0,"aveLcs":-1.0,"aveLeve":-1.0,"aveJac":-1.0,"testCases":[]}}},"type":"object","title":"ListTestCaseResponse","description":"GET /testing/testcase 响应"},"ListTestingMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasetTestings":{"items":{"$ref":"#/components/schemas/DatasetTesting"},"type":"array","title":"Datasettestings","description":"数据集测试列表","default":[]}},"type":"object","title":"ListTestingMsg","description":"GET /testing 数据结构"},"ListTestingRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"testingId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Testingid","description":"测试id"},"testingName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Testingname","description":"测试名称"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"测试使用的大模型id"},"runStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Runstatus","description":"测试运行状态"},"scoresOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"测试评分","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"测试创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTestingRequest"},"ListTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTestingMsg"}],"description":"测试列表数据结构","default":{"total":0,"datasetTestings":[]}}},"type":"object","title":"ListTestingResponse","description":"GET /testing 响应"},"ListTokenizerResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"分词器的列表数据结构","default":[]}},"type":"object","title":"ListTokenizerResponse","description":"GET /other/tokenizer 响应"},"ListUserMessageMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"userMessages":{"items":{"$ref":"#/components/schemas/UserMsg"},"type":"array","title":"Usermessages","description":"用户消息列表","default":[]}},"type":"object","title":"ListUserMessageMsg","description":"GET /usr_msg 数据结构"},"ListUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListUserMessageMsg"}],"description":"用户消息列表数据结构","default":{"total":0,"userMessages":[]}}},"type":"object","title":"ListUserMessageResponse"},"OrderType":{"type":"string","enum":["asc","desc"],"title":"OrderType","description":"排序"},"ParseDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"ParseDocumentResponse","description":"POST /doc/parse 响应"},"ParseMethod":{"type":"string","enum":["general","ocr","enhanced","qa"],"title":"ParseMethod","description":"解析方法"},"RunTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"RunTestingResponse","description":"POST /testing/run 响应"},"SearchMethod":{"type":"string","enum":["keyword","embedding","keyword_embedding","doc2chunk","doc2chunk_bfs","enhanced_by_llm"],"title":"SearchMethod","description":"搜索方法"},"Task":{"properties":{"opId":{"type":"string","format":"uuid","title":"Opid","description":"关联实体ID"},"opName":{"type":"string","title":"Opname","description":"关联实体的名称"},"taskId":{"type":"string","format":"uuid","title":"Taskid","description":"任务ID"},"taskStatus":{"$ref":"#/components/schemas/TaskStatus"},"taskType":{"$ref":"#/components/schemas/TaskType"},"taskCompleted":{"type":"number","title":"Taskcompleted","description":"任务完成度"},"createdTime":{"type":"string","title":"Createdtime","description":"任务创建时间"}},"type":"object","required":["opId","opName","taskId","taskStatus","taskType","taskCompleted","createdTime"],"title":"Task","description":"任务信息"},"TaskStatus":{"type":"string","enum":["pending","running","success","failed","canceled","deleted"],"title":"TaskStatus","description":"任务状态"},"TaskType":{"type":"string","enum":["doc_parse","kb_export","kb_import","dataset_export","dataset_import","dataset_generate","testing_run"],"title":"TaskType","description":"任务类型"},"Team":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","description":"团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","description":"团队描述"},"authorName":{"type":"string","title":"Authorname","description":"团队创建者的用户ID"},"memberCount":{"type":"integer","title":"Membercount","description":"团队成员数量"},"isPublic":{"type":"boolean","title":"Ispublic","description":"是否为公开团队"},"createdTime":{"type":"string","title":"Createdtime","description":"团队创建时间"}},"type":"object","required":["teamId","teamName","description","authorName","memberCount","isPublic","createdTime"],"title":"Team","description":"团队信息"},"TeamKnowledgebase":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","title":"Teamname","description":"团队名称"},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表","default":[]}},"type":"object","required":["teamId","teamName"],"title":"TeamKnowledgebase","description":"团队知识库信息"},"TeamMsg":{"properties":{"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"authorName":{"type":"string","title":"Authorname","description":"消息发送者的用户名"},"message":{"type":"string","title":"Message","description":"消息内容"}},"type":"object","required":["msgId","authorName","message"],"title":"TeamMsg","description":"团队信息"},"TeamType":{"type":"string","enum":["mycreated","myjoined","all"],"title":"TeamType","description":"团队类型"},"TeamUser":{"properties":{"userId":{"type":"string","format":"uuid","title":"Userid","description":"用户ID"},"userName":{"type":"string","title":"Username","description":"用户名"},"roleName":{"type":"string","title":"Rolename","description":"角色名称"}},"type":"object","required":["userId","userName","roleName"],"title":"TeamUser","description":"团队成员信息"},"TestCase":{"properties":{"testCaseId":{"type":"string","format":"uuid","title":"Testcaseid","description":"测试用例ID"},"question":{"type":"string","title":"Question","description":"问题"},"answer":{"type":"string","title":"Answer","description":"标准答案"},"llm_answer":{"type":"string","title":"Llm Answer","description":"大模型的回答"},"related_chunk":{"type":"string","title":"Related Chunk","description":"检索到的片段"},"doc_name":{"type":"string","title":"Doc Name","description":"来源文档"},"score":{"type":"number","title":"Score","description":"综合得分"},"pre":{"type":"number","title":"Pre","description":"精确率"},"rec":{"type":"number","title":"Rec","description":"召回率"},"fai":{"type":"number","title":"Fai","description":"忠实值"},"rel":{"type":"number","title":"Rel","description":"可解释性"},"lcs":{"type":"number","title":"Lcs","description":"最长公共子串得分"},"leve":{"type":"number","title":"Leve","description":"编辑距离得分"},"jac":{"type":"number","title":"Jac","description":"杰卡德相似系数"}},"type":"object","required":["testCaseId","question","answer","llm_answer","related_chunk","doc_name","score","pre","rec","fai","rel","lcs","leve","jac"],"title":"TestCase","description":"测试用例信息"},"Testing":{"properties":{"testingId":{"type":"string","format":"uuid","title":"Testingid","description":"测试ID"},"testingName":{"type":"string","title":"Testingname","description":"测试名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"测试描述","max":150},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"测试使用的大模型信息"},"searchMethod":{"$ref":"#/components/schemas/SearchMethod"},"testingTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"测试任务"},"aveScore":{"type":"number","title":"Avescore","description":"综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"杰卡德相似系数","default":-1},"authorName":{"type":"string","title":"Authorname","description":"测试创建者的用户名"},"status":{"$ref":"#/components/schemas/TestingStatus"}},"type":"object","required":["testingId","testingName","description","searchMethod","authorName","status"],"title":"Testing","description":"测试信息"},"TestingStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"TestingStatus","description":"测试状态"},"TestingTestCase":{"properties":{"aveScore":{"type":"number","title":"Avescore","description":"平均综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"平均精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"平均召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"平均忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"平均可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"平均最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"平均编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"平均杰卡德相似系数","default":-1},"testCases":{"items":{"$ref":"#/components/schemas/TestCase"},"type":"array","title":"Testcases","description":"测试用例列表","default":[]}},"type":"object","title":"TestingTestCase","description":"GET /testing/testcase 数据结构"},"Tokenizer":{"type":"string","enum":["中文","en","mix"],"title":"Tokenizer","description":"分词器"},"TypeAction":{"properties":{"actionType":{"$ref":"#/components/schemas/ActionType"},"actions":{"items":{"$ref":"#/components/schemas/action"},"type":"array","title":"Actions","description":"操作列表","default":[]}},"type":"object","required":["actionType"],"title":"TypeAction","description":"不同类别的类别操作"},"UpdateChunkRequest":{"properties":{"text":{"type":"string","minLength":1,"title":"Text","description":"分块文本内容"},"enabled":{"type":"boolean","title":"Enabled","description":"分块是否启用","default":true}},"type":"object","required":["text"],"title":"UpdateChunkRequest"},"UpdateChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"分片ID"}},"type":"object","title":"UpdateChunkResponse","description":"PUT /chunk 响应"},"UpdateDataRequest":{"properties":{"question":{"type":"string","maxLength":200,"minLength":1,"title":"Question","description":"问题","default":"这是一个默认的问题"},"answer":{"type":"string","maxLength":1000,"minLength":1,"title":"Answer","description":"答案","default":"这是一个默认的答案"}},"type":"object","title":"UpdateDataRequest"},"UpdateDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据ID"}},"type":"object","title":"UpdateDataResponse","description":"PUT /dataset/data 响应"},"UpdateDatasetRequest":{"properties":{"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""}},"type":"object","title":"UpdateDatasetRequest"},"UpdateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"UpdateDatasetResponse","description":"PUT /dataset 响应"},"UpdateDocumentRequest":{"properties":{"docName":{"type":"string","maxLength":150,"minLength":1,"title":"Docname","default":"这是一个默认的文档名称"},"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"chunkSize":{"type":"integer","title":"Chunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用","default":true}},"type":"object","title":"UpdateDocumentRequest"},"UpdateDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","format":"uuid","title":"Result","description":"文档ID"}},"type":"object","title":"UpdateDocumentResponse","description":"PUT /doc 响应"},"UpdateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":30,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"UpdateKnowledgeBaseRequest"},"UpdateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"UpdateKnowledgeBaseResponse","description":"PUT /kb 响应"},"UpdateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"UpdateRoleRequest"},"UpdateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"UpdateRoleResponse","description":"PUT /role 响应"},"UpdateTeamAuthorResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamAuthorResponse","description":"PUT /team/author 响应"},"UpdateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"UpdateTeamRequest"},"UpdateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamResponse","description":"PUT /team 响应"},"UpdateTeamUserRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队成员ID"}},"type":"object","title":"UpdateTeamUserRoleResponse","description":"PUT /team/usr 响应"},"UpdateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":150,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"测试数据集id"},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_embedding"}},"type":"object","required":["datasetId","llmId"],"title":"UpdateTestingRequest"},"UpdateTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"UpdateTestingResponse","description":"PUT /testing 响应"},"UpdateUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"消息ID"}},"type":"object","title":"UpdateUserMessageResponse","description":"PUT /usr_msg 响应"},"UploadDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"UploadDocumentResponse","description":"POST /doc 响应"},"UserMessageStatus":{"type":"string","enum":["unread","accepted","rejected"],"title":"UserMessageStatus","description":"用户消息状态"},"UserMessageType":{"type":"string","enum":["invitation","application"],"title":"UserMessageType","description":"用户消息类型"},"UserMsg":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"senderId":{"type":"string","format":"uuid","title":"Senderid","description":"发送者ID"},"senderName":{"type":"string","title":"Sendername","description":"发送者名称"},"receiverId":{"type":"string","format":"uuid","title":"Receiverid","description":"接收者ID"},"receiverName":{"type":"string","title":"Receivername","description":"接收者名称"},"msgType":{"allOf":[{"$ref":"#/components/schemas/UserMessageType"}],"description":"消息类型"},"msgStatus":{"allOf":[{"$ref":"#/components/schemas/UserMessageStatus"}],"description":"消息状态"},"createdTime":{"type":"string","title":"Createdtime","description":"创建时间"}},"type":"object","required":["teamId","msgId","senderId","senderName","receiverId","receiverName","msgType","msgStatus","createdTime"],"title":"UserMsg","description":"用户消息"},"UserStatus":{"type":"string","enum":["active","inactive","deleted"],"title":"UserStatus","description":"用户状态"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"action":{"properties":{"actionName":{"type":"string","title":"Actionname","description":"操作名称","max":20,"min":1},"action":{"type":"string","title":"Action","description":"操作","max":20,"min":1},"isUsed":{"type":"boolean","title":"Isused","description":"是否启用"}},"type":"object","required":["actionName","action","isUsed"],"title":"action","description":"操作信息"},"role":{"properties":{"roleId":{"type":"string","format":"uuid","title":"Roleid","description":"角色ID"},"roleName":{"type":"string","title":"Rolename","description":"角色名称","max":20,"min":1},"typeActions":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Typeactions","description":"操作类型列表","default":[]}},"type":"object","required":["roleId","roleName"],"title":"role","description":"角色信息"}}}} \ No newline at end of file +{"openapi":"3.1.0","info":{"title":"FastAPI","version":"0.1.0"},"paths":{"/routes":{"get":{"summary":"Get All Routes","operationId":"get_all_routes_routes_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/team/list":{"post":{"tags":["Team"],"summary":"List Teams","operationId":"list_teams_team_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/usr":{"post":{"tags":["Team"],"summary":"List Team User By Team Id","operationId":"list_team_user_by_team_id_team_usr_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_usr_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamUserRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team User By Team Id And User Subs","operationId":"delete_team_user_by_team_id_and_user_subs_team_usr_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string"},"title":"Usersub"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/msg":{"post":{"tags":["Team"],"summary":"List Team Msg By Team Id","operationId":"list_team_msg_by_team_id_team_msg_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team":{"post":{"tags":["Team"],"summary":"Create Team","operationId":"create_team_team_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team By Team Id","operationId":"delete_team_by_team_id_team_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/invitation":{"post":{"tags":["Team"],"summary":"Invite Team User By User Sub","operationId":"invite_team_user_by_user_sub_team_invitation_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"userSubInvite","in":"query","required":true,"schema":{"type":"string","title":"Usersubinvite"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/InviteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/application":{"post":{"tags":["Team"],"summary":"Join Team","operationId":"join_team_team_application_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JoinTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/author":{"put":{"tags":["Team"],"summary":"Update Team Author By Team Id","operationId":"update_team_author_by_team_id_team_author_put","parameters":[{"name":"recriverSub","in":"query","required":true,"schema":{"type":"string","title":"Recriversub"}},{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamAuthorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb":{"get":{"tags":["Knowledge Base"],"summary":"List Kb By User Sub","operationId":"list_kb_by_user_sub_kb_get","parameters":[{"name":"kbName","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListAllKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"tags":["Knowledge Base"],"summary":"Create Kb","operationId":"create_kb_kb_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Knowledge Base"],"summary":"Update Kb By Kb Id","operationId":"update_kb_by_kb_id_kb_put","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Knowledge Base"],"summary":"Delete Kb By Kb Ids","operationId":"delete_kb_by_kb_ids_kb_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/team":{"post":{"tags":["Knowledge Base"],"summary":"List Kb By Team Id","operationId":"list_kb_by_team_id_kb_team_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/doc_type":{"get":{"tags":["Knowledge Base"],"summary":"List Doc Types By Kb Id","operationId":"list_doc_types_by_kb_id_kb_doc_type_get","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentTypesResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/download":{"get":{"tags":["Knowledge Base"],"summary":"Download Kb By Task Id","operationId":"download_kb_by_task_id_kb_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/import":{"post":{"tags":["Knowledge Base"],"summary":"Import Kbs","operationId":"import_kbs_kb_import_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_kbs_kb_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/export":{"post":{"tags":["Knowledge Base"],"summary":"Export Kb By Kb Ids","operationId":"export_kb_by_kb_ids_kb_export_post","parameters":[{"name":"kbIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/list":{"post":{"tags":["Chunk"],"summary":"List Chunks By Document Id","operationId":"list_chunks_by_document_id_chunk_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/search":{"post":{"tags":["Chunk"],"summary":"Search Chunks","operationId":"search_chunks_chunk_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk":{"put":{"tags":["Chunk"],"summary":"Update Chunk By Id","operationId":"update_chunk_by_id_chunk_put","parameters":[{"name":"chunkId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Chunkid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/list":{"post":{"tags":["Document"],"summary":"List Doc","operationId":"list_doc_doc_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/download":{"get":{"tags":["Document"],"summary":"Download Doc By Id","operationId":"download_doc_by_id_doc_download_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/report":{"get":{"tags":["Document"],"summary":"Get Doc Report","operationId":"get_doc_report_doc_report_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetDocumentReportResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc":{"post":{"tags":["Document"],"summary":"Upload Docs","operationId":"upload_docs_doc_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_upload_docs_doc_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UploadDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Document"],"summary":"Update Doc By Doc Id","operationId":"update_doc_by_doc_id_doc_put","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/parse":{"post":{"tags":["Document"],"summary":"Parse Docuement By Doc Ids","operationId":"parse_docuement_by_doc_ids_doc_parse_post","parameters":[{"name":"docIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}},{"name":"parse","in":"query","required":true,"schema":{"type":"boolean","title":"Parse"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ParseDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/health_check":{"get":{"tags":["Health check"],"summary":"Health Check","operationId":"health_check_health_check_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/dataset/list":{"post":{"tags":["Dataset"],"summary":"List Dataset By Kb Id","operationId":"list_dataset_by_kb_id_dataset_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/data":{"post":{"tags":["Dataset"],"summary":"List Data In Dataset","operationId":"list_data_in_dataset_dataset_data_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Data By Dataset Id","operationId":"update_data_by_dataset_id_dataset_data_put","parameters":[{"name":"dataId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Dataid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Data By Data Ids","operationId":"delete_data_by_data_ids_dataset_data_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Dataids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/testing/exist":{"get":{"tags":["Dataset"],"summary":"Is Dataset Have Testing","operationId":"is_dataset_have_testing_dataset_testing_exist_get","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/IsDatasetHaveTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/download":{"get":{"tags":["Dataset"],"summary":"Download Dataset By Task Id","operationId":"download_dataset_by_task_id_dataset_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset":{"post":{"tags":["Dataset"],"summary":"Create Dataset","operationId":"create_dataset_dataset_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Dataset By Dataset Id","operationId":"update_dataset_by_dataset_id_dataset_put","parameters":[{"name":"databaseId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Databaseid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Dataset By Dataset Ids","operationId":"delete_dataset_by_dataset_ids_dataset_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Databaseid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/import":{"post":{"tags":["Dataset"],"summary":"Import Dataset","operationId":"import_dataset_dataset_import_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_dataset_dataset_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/export":{"post":{"tags":["Dataset"],"summary":"Export Dataset By Dataset Ids","operationId":"export_dataset_by_dataset_ids_dataset_export_post","parameters":[{"name":"datasetIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Datasetids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/generate":{"post":{"tags":["Dataset"],"summary":"Generate Dataset By Id","operationId":"generate_dataset_by_id_dataset_generate_post","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}},{"name":"generate","in":"query","required":true,"schema":{"type":"boolean","title":"Generate"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/other/llm":{"get":{"tags":["Other"],"summary":"List Llms By User Sub","operationId":"list_llms_by_user_sub_other_llm_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListLLMResponse"}}}}}}},"/other/embedding":{"get":{"tags":["Other"],"summary":"List Embeddings","operationId":"list_embeddings_other_embedding_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListEmbeddingResponse"}}}}}}},"/other/tokenizer":{"get":{"tags":["Other"],"summary":"List Tokenizers","operationId":"list_tokenizers_other_tokenizer_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTokenizerResponse"}}}}}}},"/other/parse_method":{"get":{"tags":["Other"],"summary":"List Parse Method","operationId":"list_parse_method_other_parse_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListParseMethodResponse"}}}}}}},"/other/search_method":{"get":{"tags":["Other"],"summary":"List Search Method","operationId":"list_search_method_other_search_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListSearchMethodResponse"}}}}}}},"/testing/list":{"post":{"tags":["Testing"],"summary":"List Testing By Kb Id","operationId":"list_testing_by_kb_id_testing_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/testcase":{"post":{"tags":["Testing"],"summary":"List Testcase By Testing Id","operationId":"list_testcase_by_testing_id_testing_testcase_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestCaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/download":{"get":{"tags":["Testing"],"summary":"Download Testing Report By Testing Id","operationId":"download_testing_report_by_testing_id_testing_download_get","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing":{"post":{"tags":["Testing"],"summary":"Create Testing","operationId":"create_testing_testing_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingResponsing"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Testing"],"summary":"Update Testing By Testing Id","operationId":"update_testing_by_testing_id_testing_put","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Testing"],"summary":"Delete Testing By Testing Ids","operationId":"delete_testing_by_testing_ids_testing_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Testingids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/run":{"post":{"tags":["Testing"],"summary":"Run Testing By Testing Id","operationId":"run_testing_by_testing_id_testing_run_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}},{"name":"run","in":"query","required":true,"schema":{"type":"boolean","title":"Run"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/RunTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role/action":{"get":{"tags":["Role"],"summary":"List Actions","operationId":"list_actions_role_action_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListActionResponse"}}}}}}},"/role/list":{"post":{"tags":["Role"],"summary":"List Role By Team Id","operationId":"list_role_by_team_id_role_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role":{"post":{"tags":["Role"],"summary":"Create Role","operationId":"create_role_role_post","parameters":[{"name":"TeamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Role"],"summary":"Update Role By Role Id","operationId":"update_role_by_role_id_role_put","parameters":[{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Role"],"summary":"Delete Role By Role Ids","operationId":"delete_role_by_role_ids_role_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Roleid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg/list":{"post":{"tags":["User Message"],"summary":"List User Msgs By User Sub","operationId":"list_user_msgs_by_user_sub_usr_msg_list_post","parameters":[{"name":"msgType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserMessageType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg":{"put":{"tags":["User Message"],"summary":"Update User Msg By Msg Id","operationId":"update_user_msg_by_msg_id_usr_msg_put","parameters":[{"name":"msgId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Msgid"}},{"name":"msgStatus","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserStatus"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["User Message"],"summary":"Delete User Msg By Msg Ids","operationId":"delete_user_msg_by_msg_ids_usr_msg_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Msgids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task":{"post":{"tags":["Task"],"summary":"List Task","operationId":"list_task_task_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/one":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Id","operationId":"delete_task_by_task_id_task_one_delete","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByIdResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/all":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Type","operationId":"delete_task_by_task_type_task_all_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"taskType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/TaskType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByTypeResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"ActionType":{"type":"string","enum":["team","user","role","knowledge_base","document","chunk","dataset","testing","dataset_data","task"],"title":"ActionType","description":"操作类型"},"Body_import_dataset_dataset_import_post":{"properties":{"dataset_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Dataset Packages"}},"type":"object","required":["dataset_packages"],"title":"Body_import_dataset_dataset_import_post"},"Body_import_kbs_kb_import_post":{"properties":{"kb_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Kb Packages"}},"type":"object","required":["kb_packages"],"title":"Body_import_kbs_kb_import_post"},"Body_upload_docs_doc_post":{"properties":{"docs":{"items":{"type":"string","format":"binary"},"type":"array","title":"Docs"}},"type":"object","required":["docs"],"title":"Body_upload_docs_doc_post"},"Chunk":{"properties":{"chunkId":{"type":"string","format":"uuid","title":"Chunkid","description":"分片ID"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"分片类型"},"text":{"type":"string","title":"Text","description":"分片文本"}},"type":"object","required":["chunkId","chunkType","text"],"title":"Chunk","description":"文档分片信息"},"ChunkType":{"type":"string","enum":["text","table","image","code","link"],"title":"ChunkType","description":"分块类型"},"CreateDatasetRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""},"documentIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Documentids","description":"测试数据集关联的文档","default":[]},"dataCnt":{"type":"integer","title":"Datacnt","description":"测试数据集内的数据数量","default":64,"max":512,"min":1},"llmId":{"type":"string","title":"Llmid","description":"测试数据集使用的大模型id"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"测试数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"测试数据集进行上下文关联","default":false}},"type":"object","required":["kbId","llmId"],"title":"CreateDatasetRequest"},"CreateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集生成任务ID"}},"type":"object","title":"CreateDatasetResponse","description":"POST /dataset 响应"},"CreateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":20,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"知识库使用的embedding模型","default":""},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"CreateKnowledgeBaseRequest"},"CreateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"CreateKnowledgeBaseResponse","description":"POST /kb 响应"},"CreateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"CreateRoleRequest"},"CreateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"CreateRoleResponse","description":"POST /role 响应"},"CreateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"CreateTeamRequest"},"CreateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"CreateTeamResponse","description":"POST /team 响应"},"CreateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":30,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"测试数据集id"},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_and_vector"},"topK":{"type":"integer","title":"Topk","description":"测试中检索方法关联的片段数量","default":5}},"type":"object","required":["datasetId","llmId"],"title":"CreateTestingRequest"},"CreateTestingResponsing":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"CreateTestingResponsing","description":"POST /testing 响应"},"Data":{"properties":{"dataId":{"type":"string","format":"uuid","title":"Dataid","description":"数据ID"},"docName":{"type":"string","title":"Docname","description":"数据关联的文档名称"},"question":{"type":"string","title":"Question","description":"数据的问题"},"answer":{"type":"string","title":"Answer","description":"数据的答案"},"chunk":{"type":"string","title":"Chunk","description":"数据的片段"}},"type":"object","required":["dataId","docName","question","answer","chunk"],"title":"Data"},"DataSetStatus":{"type":"string","enum":["idle","pending","generating","exporting","importing","deleted"],"title":"DataSetStatus","description":"数据集状态"},"Dataset":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"数据集描述","max":150},"dataCnt":{"type":"integer","title":"Datacnt","description":"数据集数据数量"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"数据集进行上下文关联","default":false},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"生成数据集使用的大模型信息"},"generateTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"数据集生成任务"},"score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Score","description":"数据集评分"},"authorName":{"type":"string","title":"Authorname","description":"数据集创建者的用户名"},"status":{"$ref":"#/components/schemas/DataSetStatus"}},"type":"object","required":["datasetId","datasetName","description","dataCnt","authorName","status"],"title":"Dataset","description":"数据集信息"},"DatasetTesting":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称"},"testings":{"items":{"$ref":"#/components/schemas/Testing"},"type":"array","title":"Testings","description":"测试列表","default":[]}},"type":"object","required":["datasetId","datasetName"],"title":"DatasetTesting","description":"数据集测试信息"},"DeleteDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据ID列表","default":[]}},"type":"object","title":"DeleteDataResponse","description":"DELETE /dataset/data 响应"},"DeleteDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据集ID列表","default":[]}},"type":"object","title":"DeleteDatasetResponse","description":"DELETE /dataset 响应"},"DeleteDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"DeleteDocumentResponse","description":"DELETE /doc 响应"},"DeleteKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"知识库ID列表","default":[]}},"type":"object","title":"DeleteKnowledgeBaseResponse","description":"DELETE /kb 响应"},"DeleteRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"角色ID列表","default":[]}},"type":"object","title":"DeleteRoleResponse","description":"DELETE /role 响应"},"DeleteTaskByIdResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"任务ID"}},"type":"object","title":"DeleteTaskByIdResponse","description":"DELETE /task/one 响应"},"DeleteTaskByTypeResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"DeleteTaskByTypeResponse","description":"DELETE /task/all 响应"},"DeleteTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"DeleteTeamResponse","description":"DELETE /team 响应"},"DeleteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"团队成员ID列表","default":[]}},"type":"object","title":"DeleteTeamUserResponse","description":"DELETE /team/usr 响应"},"DeleteTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"测试ID列表","default":[]}},"type":"object","title":"DeleteTestingResponse","description":"DELETE /testing 响应"},"DeleteUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"消息ID列表","default":[]}},"type":"object","title":"DeleteUserMessageResponse","description":"DELETE /usr_msg 响应"},"DocChunk":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","required":["docId","docName"],"title":"DocChunk","description":"Post /chunk/search 数据结构"},"Document":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"docType":{"allOf":[{"$ref":"#/components/schemas/DocumentType-Output"}],"description":"文档类型"},"chunkSize":{"type":"integer","title":"Chunksize","description":"文档分片大小"},"createdTime":{"type":"string","title":"Createdtime","description":"文档创建时间"},"docTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"文档任务"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"文档解析方法"},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用"},"authorName":{"type":"string","title":"Authorname","description":"文档创建者的用户名"},"status":{"$ref":"#/components/schemas/DocumentStatus"}},"type":"object","required":["docId","docName","docType","chunkSize","createdTime","docTask","parseMethod","enabled","authorName","status"],"title":"Document","description":"文档信息"},"DocumentStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"DocumentStatus","description":"文档状态"},"DocumentType-Input":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"docTypeName":{"type":"string","maxLength":20,"minLength":1,"title":"Doctypename","default":"这是一个默认的文档类型名称"}},"type":"object","title":"DocumentType"},"DocumentType-Output":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型ID"},"docTypeName":{"type":"string","title":"Doctypename","description":"文档类型名称"}},"type":"object","required":["docTypeId","docTypeName"],"title":"DocumentType","description":"文档类型信息"},"ExportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ExportDatasetResponse","description":"POST /dataset/export 响应"},"ExportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ExportKnowledgeBaseResponse","description":"POST /kb/export 响应"},"GenerateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"GenerateDatasetResponse","description":"POST /dataset/generate 响应"},"GetDocumentReportResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","title":"Result","description":"文档报告数据结构","default":""}},"type":"object","title":"GetDocumentReportResponse","description":"GET /doc/report 响应"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ImportDatasetResponse","description":"POST /dataset/import 响应"},"ImportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ImportKnowledgeBaseResponse","description":"POST /kb/import 响应"},"InviteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"邀请ID"}},"type":"object","title":"InviteTeamUserResponse","description":"POST /team/invitation 响应"},"IsDatasetHaveTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"boolean","title":"Result","description":"数据集是否有测试任务","default":false}},"type":"object","title":"IsDatasetHaveTestingResponse","description":"GET /dataset/testing/exist 响应"},"JoinTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"申请ID"}},"type":"object","title":"JoinTeamResponse","description":"POST /team/application 响应"},"Knowledgebase":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"知识库ID"},"kbName":{"type":"string","title":"Kbname","description":"知识库名称","max":20,"min":1},"authorName":{"type":"string","title":"Authorname","description":"知识库创建者的用户名"},"tokenizer":{"$ref":"#/components/schemas/Tokenizer"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"嵌入模型"},"description":{"type":"string","title":"Description","description":"知识库描述","max":150},"docCnt":{"type":"integer","title":"Doccnt","description":"知识库文档数量"},"docSize":{"type":"integer","title":"Docsize","description":"知识库文档大小"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库单次文件上传数量限制"},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库单次文件上传大小限制"},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"默认解析方法"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"默认分块大小"},"createdTime":{"type":"string","title":"Createdtime","description":"知识库创建时间"},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Doctypes","description":"知识库文档类型列表","default":[]}},"type":"object","required":["kbId","kbName","authorName","tokenizer","embeddingModel","description","docCnt","docSize","uploadCountLimit","uploadSizeLimit","defaultParseMethod","defaultChunkSize","createdTime"],"title":"Knowledgebase","description":"知识库信息"},"LLM":{"properties":{"llmId":{"type":"string","title":"Llmid","description":"大模型ID"},"llmName":{"type":"string","title":"Llmname","description":"大模型名称","max":20,"min":1},"llmIcon":{"type":"string","title":"Llmicon","description":"大模型图标"}},"type":"object","required":["llmId","llmName","llmIcon"],"title":"LLM"},"ListActionMsg":{"properties":{"actionTypes":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Actiontypes","description":"操作类型列表","default":[]}},"type":"object","title":"ListActionMsg","description":"GET /role/action 数据结构"},"ListActionResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListActionMsg"}],"description":"操作列表数据结构","default":{"actionTypes":[]}}},"type":"object","title":"ListActionResponse"},"ListAllKnowledgeBaseMsg":{"properties":{"teamKnowledgebases":{"items":{"$ref":"#/components/schemas/TeamKnowledgebase"},"type":"array","title":"Teamknowledgebases","description":"团队知识库列表","default":[]}},"type":"object","title":"ListAllKnowledgeBaseMsg","description":"GET /kb 数据结构"},"ListAllKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListAllKnowledgeBaseMsg"}],"description":"团队知识库列表数据结构","default":{"teamKnowledgebases":[]}}},"type":"object","title":"ListAllKnowledgeBaseResponse","description":"GET /kb 响应"},"ListChunkMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","title":"ListChunkMsg","description":"GET /chunk 数据结构"},"ListChunkRequest":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档id"},"text":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Text","description":"分块文本内容"},"type":{"anyOf":[{"$ref":"#/components/schemas/ChunkType"},{"type":"null"}],"description":"分块类型"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["docId"],"title":"ListChunkRequest"},"ListChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListChunkMsg"}],"description":"分片列表数据结构","default":{"total":0,"chunks":[]}}},"type":"object","title":"ListChunkResponse","description":"GET /chunk 响应"},"ListDataInDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"data":{"items":{"$ref":"#/components/schemas/Data"},"type":"array","title":"Data","description":"数据列表","default":[]}},"type":"object","title":"ListDataInDatasetMsg","description":"GET /dataset/data 数据结构"},"ListDataInDatasetRequest":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["datasetId"],"title":"ListDataInDatasetRequest"},"ListDataInDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDataInDatasetMsg"}],"description":"数据列表数据结构","default":{"total":0,"data":[]}}},"type":"object","title":"ListDataInDatasetResponse","description":"GET /dataset/data 响应"},"ListDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasets":{"items":{"$ref":"#/components/schemas/Dataset"},"type":"array","title":"Datasets","description":"数据集列表","default":[]}},"type":"object","title":"ListDatasetMsg","description":"GET /dataset 数据结构"},"ListDatasetRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"datasetId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Datasetid","description":"数据集id"},"datasetName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Datasetname","description":"数据集名称"},"dataCnt":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集数据数量","default":"desc"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"数据集使用的大模型id"},"isDataCleared":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Isdatacleared","description":"数据集是否清洗"},"isChunkRelated":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Ischunkrelated","description":"数据集是否上下文关联"},"generateStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Generatestatus","description":"数据集生成状态"},"scoreOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集评分的排序方法","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"数据集创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListDatasetRequest"},"ListDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDatasetMsg"}],"description":"数据集列表数据结构","default":{"total":0,"datasets":[]}}},"type":"object","title":"ListDatasetResponse","description":"GET /dataset 响应"},"ListDocumentMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"documents":{"items":{"$ref":"#/components/schemas/Document"},"type":"array","title":"Documents","description":"文档列表","default":[]}},"type":"object","title":"ListDocumentMsg","description":"GET /doc 数据结构"},"ListDocumentRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"docId":{"anyOf":[{"type":"string","maxLength":30,"minLength":1,"format":"uuid"},{"type":"null"}],"title":"Docid","description":"文档id"},"docName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Docname","description":"文档名称"},"docTypeId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Doctypeid","description":"文档类型id"},"parseStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Parsestatus","description":"文档解析状态"},"parseMethod":{"anyOf":[{"$ref":"#/components/schemas/ParseMethod"},{"type":"null"}],"description":"文档解析方法"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"文档创建者"},"createdTimeStart":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimestart","description":"文档创建时间开始"},"createdTimeEnd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimeend","description":"文档创建时间结束"},"createdTimeOrder":{"allOf":[{"$ref":"#/components/schemas/OrderType"}],"description":"文档创建时间排序","default":"desc"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListDocumentRequest"},"ListDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDocumentMsg"}],"description":"文档列表数据结构","default":{"total":0,"documents":[]}}},"type":"object","title":"ListDocumentResponse","description":"GET /doc 响应"},"ListDocumentTypesResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Result","description":"文档类型列表数据结构","default":[]}},"type":"object","title":"ListDocumentTypesResponse","description":"GET /kb/doc_type 响应"},"ListEmbeddingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"向量化模型的列表数据结构","default":[]}},"type":"object","title":"ListEmbeddingResponse","description":"GET /other/embedding 数据结构"},"ListKnowledgeBaseMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表数据结构","default":[]}},"type":"object","title":"ListKnowledgeBaseMsg"},"ListKnowledgeBaseRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"kbName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname","description":"资产名称"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"资产创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListKnowledgeBaseRequest"},"ListKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListKnowledgeBaseMsg"}],"default":{"total":0,"kbList":[]}}},"type":"object","title":"ListKnowledgeBaseResponse","description":"GET /kb/team 响应"},"ListLLMMsg":{"properties":{"llms":{"items":{"$ref":"#/components/schemas/LLM"},"type":"array","title":"Llms","description":"大模型列表","default":[]}},"type":"object","title":"ListLLMMsg","description":"GET /other/llm 数据结构"},"ListLLMResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListLLMMsg"}],"description":"大模型列表数据结构","default":{"llms":[]}}},"type":"object","title":"ListLLMResponse","description":"GET /other/llm 响应"},"ListParseMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"解析方法的列表数据结构","default":[]}},"type":"object","title":"ListParseMethodResponse","description":"\"GET /other/parse_method 响应"},"ListRoleMsg":{"properties":{"roles":{"items":{"$ref":"#/components/schemas/role"},"type":"array","title":"Roles","description":"角色列表","default":[]}},"type":"object","title":"ListRoleMsg","description":"GET /role 数据结构"},"ListRoleRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"roleId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Roleid","description":"角色id"},"roleName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rolename","description":"角色名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListRoleRequest"},"ListRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListRoleMsg"}],"description":"角色列表数据结构","default":{"roles":[]}}},"type":"object","title":"ListRoleResponse","description":"GET /role 响应"},"ListSearchMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"搜索方法的列表数据结构","default":[]}},"type":"object","title":"ListSearchMethodResponse","description":"GET /other/search_method 响应"},"ListTaskMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"tasks":{"items":{"$ref":"#/components/schemas/Task"},"type":"array","title":"Tasks","description":"任务列表","default":[]}},"type":"object","title":"ListTaskMsg","description":"GET /task 数据结构"},"ListTaskRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"taskId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Taskid","description":"任务id"},"taskType":{"anyOf":[{"$ref":"#/components/schemas/TaskType"},{"type":"null"}],"description":"任务类型"},"taskStatus":{"anyOf":[{"$ref":"#/components/schemas/TaskStatus"},{"type":"null"}],"description":"任务状态"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTaskRequest"},"ListTaskResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTaskMsg"}],"description":"任务列表数据结构","default":{"total":0,"tasks":[]}}},"type":"object","title":"ListTaskResponse","description":"GET /task 响应"},"ListTeamMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teams":{"items":{"$ref":"#/components/schemas/Team"},"type":"array","title":"Teams","description":"团队列表","default":[]}},"type":"object","title":"ListTeamMsg","description":"GET /team 数据结构"},"ListTeamMsgMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamMsgs":{"items":{"$ref":"#/components/schemas/TeamMsg"},"type":"array","title":"Teammsgs","description":"团队消息列表","default":[]}},"type":"object","title":"ListTeamMsgMsg","description":"GET /team/msg 数据结构"},"ListTeamMsgRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamMsgRequest"},"ListTeamMsgResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsgMsg"}],"description":"团队消息列表数据结构","default":{"total":0,"teamMsgs":[]}}},"type":"object","title":"ListTeamMsgResponse"},"ListTeamRequest":{"properties":{"teamType":{"allOf":[{"$ref":"#/components/schemas/TeamType"}],"description":"团队类型","default":"all"},"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"teamName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Teamname","description":"团队名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamRequest"},"ListTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsg"}],"description":"团队列表数据结构","default":{"total":0,"teams":[]}}},"type":"object","title":"ListTeamResponse","description":"GET /team 响应"},"ListTeamUserMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamUsers":{"items":{"$ref":"#/components/schemas/TeamUser"},"type":"array","title":"Teamusers","description":"团队成员列表","default":[]}},"type":"object","title":"ListTeamUserMsg","description":"GET /team/usr 数据结构"},"ListTeamUserRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"userSub":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Usersub","description":"用户ID"},"userName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Username","description":"用户名"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListTeamUserRequest"},"ListTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamUserMsg"}],"description":"团队成员列表数据结构","default":{"total":0,"teamUsers":[]}}},"type":"object","title":"ListTeamUserResponse"},"ListTestCaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/TestingTestCase"}],"description":"测试用例列表数据结构","default":{"aveScore":-1.0,"avePre":-1.0,"aveRec":-1.0,"aveFai":-1.0,"aveRel":-1.0,"aveLcs":-1.0,"aveLeve":-1.0,"aveJac":-1.0,"testCases":[]}}},"type":"object","title":"ListTestCaseResponse","description":"GET /testing/testcase 响应"},"ListTestingMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasetTestings":{"items":{"$ref":"#/components/schemas/DatasetTesting"},"type":"array","title":"Datasettestings","description":"数据集测试列表","default":[]}},"type":"object","title":"ListTestingMsg","description":"GET /testing 数据结构"},"ListTestingRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"testingId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Testingid","description":"测试id"},"testingName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Testingname","description":"测试名称"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"测试使用的大模型id"},"runStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Runstatus","description":"测试运行状态"},"scoresOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"测试评分","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"测试创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTestingRequest"},"ListTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTestingMsg"}],"description":"测试列表数据结构","default":{"total":0,"datasetTestings":[]}}},"type":"object","title":"ListTestingResponse","description":"GET /testing 响应"},"ListTokenizerResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"分词器的列表数据结构","default":[]}},"type":"object","title":"ListTokenizerResponse","description":"GET /other/tokenizer 响应"},"ListUserMessageMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"userMessages":{"items":{"$ref":"#/components/schemas/UserMsg"},"type":"array","title":"Usermessages","description":"用户消息列表","default":[]}},"type":"object","title":"ListUserMessageMsg","description":"GET /usr_msg 数据结构"},"ListUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListUserMessageMsg"}],"description":"用户消息列表数据结构","default":{"total":0,"userMessages":[]}}},"type":"object","title":"ListUserMessageResponse"},"OrderType":{"type":"string","enum":["asc","desc"],"title":"OrderType","description":"排序"},"ParseDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"ParseDocumentResponse","description":"POST /doc/parse 响应"},"ParseMethod":{"type":"string","enum":["general","ocr","enhanced","qa"],"title":"ParseMethod","description":"解析方法"},"RunTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"RunTestingResponse","description":"POST /testing/run 响应"},"SearchChunkMsg":{"properties":{"docChunks":{"items":{"$ref":"#/components/schemas/DocChunk"},"type":"array","title":"Docchunks","description":"文档分片列表","default":[]}},"type":"object","title":"SearchChunkMsg","description":"Post /chunk/search 数据结构"},"SearchChunkRequest":{"properties":{"kbIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Kbids","description":"资产id","default":[]},"query":{"type":"string","title":"Query","description":"查询内容","default":""},"topK":{"type":"integer","title":"Topk","description":"返回的结果数量","default":5},"docIds":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Docids","description":"文档id"},"bannedIds":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Bannedids","description":"禁止的分块id","default":[]},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"检索方法","default":"keyword_and_vector"},"isRelatedSurrounding":{"type":"boolean","title":"Isrelatedsurrounding","description":"是否关联上下文","default":true},"isClassifyByDoc":{"type":"boolean","title":"Isclassifybydoc","description":"是否按文档分类","default":false},"isRerank":{"type":"boolean","title":"Isrerank","description":"是否重新排序","default":false},"isCompress":{"type":"boolean","title":"Iscompress","description":"是否压缩","default":false},"tokensLimit":{"type":"integer","title":"Tokenslimit","description":"token限制","default":8192}},"type":"object","title":"SearchChunkRequest"},"SearchChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/SearchChunkMsg"}],"description":"文档分片列表数据结构","default":{"docChunks":[]}}},"type":"object","title":"SearchChunkResponse","description":"POST /chunk/search 响应"},"SearchMethod":{"type":"string","enum":["keyword","vector","keyword_and_vector","doc2chunk","doc2chunk_bfs","enhanced_by_llm"],"title":"SearchMethod","description":"搜索方法"},"Task":{"properties":{"opId":{"type":"string","format":"uuid","title":"Opid","description":"关联实体ID"},"opName":{"type":"string","title":"Opname","description":"关联实体的名称"},"taskId":{"type":"string","format":"uuid","title":"Taskid","description":"任务ID"},"taskStatus":{"$ref":"#/components/schemas/TaskStatus"},"taskType":{"$ref":"#/components/schemas/TaskType"},"taskCompleted":{"type":"number","title":"Taskcompleted","description":"任务完成度"},"createdTime":{"type":"string","title":"Createdtime","description":"任务创建时间"}},"type":"object","required":["opId","opName","taskId","taskStatus","taskType","taskCompleted","createdTime"],"title":"Task","description":"任务信息"},"TaskStatus":{"type":"string","enum":["pending","running","success","failed","canceled","deleted"],"title":"TaskStatus","description":"任务状态"},"TaskType":{"type":"string","enum":["doc_parse","kb_export","kb_import","dataset_export","dataset_import","dataset_generate","testing_run"],"title":"TaskType","description":"任务类型"},"Team":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","description":"团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","description":"团队描述"},"authorName":{"type":"string","title":"Authorname","description":"团队创建者的用户ID"},"memberCount":{"type":"integer","title":"Membercount","description":"团队成员数量"},"isPublic":{"type":"boolean","title":"Ispublic","description":"是否为公开团队"},"createdTime":{"type":"string","title":"Createdtime","description":"团队创建时间"}},"type":"object","required":["teamId","teamName","description","authorName","memberCount","isPublic","createdTime"],"title":"Team","description":"团队信息"},"TeamKnowledgebase":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","title":"Teamname","description":"团队名称"},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表","default":[]}},"type":"object","required":["teamId","teamName"],"title":"TeamKnowledgebase","description":"团队知识库信息"},"TeamMsg":{"properties":{"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"authorName":{"type":"string","title":"Authorname","description":"消息发送者的用户名"},"message":{"type":"string","title":"Message","description":"消息内容"}},"type":"object","required":["msgId","authorName","message"],"title":"TeamMsg","description":"团队信息"},"TeamType":{"type":"string","enum":["mycreated","myjoined","all"],"title":"TeamType","description":"团队类型"},"TeamUser":{"properties":{"userId":{"type":"string","format":"uuid","title":"Userid","description":"用户ID"},"userName":{"type":"string","title":"Username","description":"用户名"},"roleName":{"type":"string","title":"Rolename","description":"角色名称"}},"type":"object","required":["userId","userName","roleName"],"title":"TeamUser","description":"团队成员信息"},"TestCase":{"properties":{"testCaseId":{"type":"string","format":"uuid","title":"Testcaseid","description":"测试用例ID"},"question":{"type":"string","title":"Question","description":"问题"},"answer":{"type":"string","title":"Answer","description":"标准答案"},"llm_answer":{"type":"string","title":"Llm Answer","description":"大模型的回答"},"related_chunk":{"type":"string","title":"Related Chunk","description":"检索到的片段"},"doc_name":{"type":"string","title":"Doc Name","description":"来源文档"},"score":{"type":"number","title":"Score","description":"综合得分"},"pre":{"type":"number","title":"Pre","description":"精确率"},"rec":{"type":"number","title":"Rec","description":"召回率"},"fai":{"type":"number","title":"Fai","description":"忠实值"},"rel":{"type":"number","title":"Rel","description":"可解释性"},"lcs":{"type":"number","title":"Lcs","description":"最长公共子串得分"},"leve":{"type":"number","title":"Leve","description":"编辑距离得分"},"jac":{"type":"number","title":"Jac","description":"杰卡德相似系数"}},"type":"object","required":["testCaseId","question","answer","llm_answer","related_chunk","doc_name","score","pre","rec","fai","rel","lcs","leve","jac"],"title":"TestCase","description":"测试用例信息"},"Testing":{"properties":{"testingId":{"type":"string","format":"uuid","title":"Testingid","description":"测试ID"},"testingName":{"type":"string","title":"Testingname","description":"测试名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"测试描述","max":150},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"测试使用的大模型信息"},"searchMethod":{"$ref":"#/components/schemas/SearchMethod"},"testingTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"测试任务"},"aveScore":{"type":"number","title":"Avescore","description":"综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"杰卡德相似系数","default":-1},"authorName":{"type":"string","title":"Authorname","description":"测试创建者的用户名"},"status":{"$ref":"#/components/schemas/TestingStatus"}},"type":"object","required":["testingId","testingName","description","searchMethod","authorName","status"],"title":"Testing","description":"测试信息"},"TestingStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"TestingStatus","description":"测试状态"},"TestingTestCase":{"properties":{"aveScore":{"type":"number","title":"Avescore","description":"平均综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"平均精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"平均召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"平均忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"平均可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"平均最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"平均编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"平均杰卡德相似系数","default":-1},"testCases":{"items":{"$ref":"#/components/schemas/TestCase"},"type":"array","title":"Testcases","description":"测试用例列表","default":[]}},"type":"object","title":"TestingTestCase","description":"GET /testing/testcase 数据结构"},"Tokenizer":{"type":"string","enum":["中文","en","mix"],"title":"Tokenizer","description":"分词器"},"TypeAction":{"properties":{"actionType":{"$ref":"#/components/schemas/ActionType"},"actions":{"items":{"$ref":"#/components/schemas/action"},"type":"array","title":"Actions","description":"操作列表","default":[]}},"type":"object","required":["actionType"],"title":"TypeAction","description":"不同类别的类别操作"},"UpdateChunkRequest":{"properties":{"text":{"type":"string","minLength":1,"title":"Text","description":"分块文本内容"},"enabled":{"type":"boolean","title":"Enabled","description":"分块是否启用","default":true}},"type":"object","required":["text"],"title":"UpdateChunkRequest"},"UpdateChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"分片ID"}},"type":"object","title":"UpdateChunkResponse","description":"PUT /chunk 响应"},"UpdateDataRequest":{"properties":{"question":{"type":"string","maxLength":200,"minLength":1,"title":"Question","description":"问题","default":"这是一个默认的问题"},"answer":{"type":"string","maxLength":1000,"minLength":1,"title":"Answer","description":"答案","default":"这是一个默认的答案"}},"type":"object","title":"UpdateDataRequest"},"UpdateDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据ID"}},"type":"object","title":"UpdateDataResponse","description":"PUT /dataset/data 响应"},"UpdateDatasetRequest":{"properties":{"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""}},"type":"object","title":"UpdateDatasetRequest"},"UpdateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"UpdateDatasetResponse","description":"PUT /dataset 响应"},"UpdateDocumentRequest":{"properties":{"docName":{"type":"string","maxLength":150,"minLength":1,"title":"Docname","default":"这是一个默认的文档名称"},"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"chunkSize":{"type":"integer","title":"Chunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用","default":true}},"type":"object","title":"UpdateDocumentRequest"},"UpdateDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","format":"uuid","title":"Result","description":"文档ID"}},"type":"object","title":"UpdateDocumentResponse","description":"PUT /doc 响应"},"UpdateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":30,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"UpdateKnowledgeBaseRequest"},"UpdateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"UpdateKnowledgeBaseResponse","description":"PUT /kb 响应"},"UpdateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"UpdateRoleRequest"},"UpdateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"UpdateRoleResponse","description":"PUT /role 响应"},"UpdateTeamAuthorResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamAuthorResponse","description":"PUT /team/author 响应"},"UpdateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"UpdateTeamRequest"},"UpdateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamResponse","description":"PUT /team 响应"},"UpdateTeamUserRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队成员ID"}},"type":"object","title":"UpdateTeamUserRoleResponse","description":"PUT /team/usr 响应"},"UpdateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":150,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_and_vector"},"topK":{"type":"integer","title":"Topk","description":"测试中检索方法关联的片段数量","default":5}},"type":"object","required":["llmId"],"title":"UpdateTestingRequest"},"UpdateTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"UpdateTestingResponse","description":"PUT /testing 响应"},"UpdateUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"消息ID"}},"type":"object","title":"UpdateUserMessageResponse","description":"PUT /usr_msg 响应"},"UploadDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"UploadDocumentResponse","description":"POST /doc 响应"},"UserMessageStatus":{"type":"string","enum":["unread","accepted","rejected"],"title":"UserMessageStatus","description":"用户消息状态"},"UserMessageType":{"type":"string","enum":["invitation","application"],"title":"UserMessageType","description":"用户消息类型"},"UserMsg":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"senderId":{"type":"string","format":"uuid","title":"Senderid","description":"发送者ID"},"senderName":{"type":"string","title":"Sendername","description":"发送者名称"},"receiverId":{"type":"string","format":"uuid","title":"Receiverid","description":"接收者ID"},"receiverName":{"type":"string","title":"Receivername","description":"接收者名称"},"msgType":{"allOf":[{"$ref":"#/components/schemas/UserMessageType"}],"description":"消息类型"},"msgStatus":{"allOf":[{"$ref":"#/components/schemas/UserMessageStatus"}],"description":"消息状态"},"createdTime":{"type":"string","title":"Createdtime","description":"创建时间"}},"type":"object","required":["teamId","msgId","senderId","senderName","receiverId","receiverName","msgType","msgStatus","createdTime"],"title":"UserMsg","description":"用户消息"},"UserStatus":{"type":"string","enum":["active","inactive","deleted"],"title":"UserStatus","description":"用户状态"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"action":{"properties":{"actionName":{"type":"string","title":"Actionname","description":"操作名称","max":20,"min":1},"action":{"type":"string","title":"Action","description":"操作","max":20,"min":1},"isUsed":{"type":"boolean","title":"Isused","description":"是否启用"}},"type":"object","required":["actionName","action","isUsed"],"title":"action","description":"操作信息"},"role":{"properties":{"roleId":{"type":"string","format":"uuid","title":"Roleid","description":"角色ID"},"roleName":{"type":"string","title":"Rolename","description":"角色名称","max":20,"min":1},"typeActions":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Typeactions","description":"操作类型列表","default":[]}},"type":"object","required":["roleId","roleName"],"title":"role","description":"角色信息"}}}} \ No newline at end of file diff --git a/witchaind_doc/C3-Art0002.txt b/witchaind_doc/C3-Art0002.txt new file mode 100644 index 0000000..748a9f1 --- /dev/null +++ b/witchaind_doc/C3-Art0002.txt @@ -0,0 +1,71 @@ +【 文献号 】1-2432 +【原文出处】出版发行研究 +【原刊地名】京 +【原刊期号】199601 +【原刊页号】40-41 +【分 类 号】Z1 +【分 类 名】出版工作、图书评介 +【 作 者 】王益 +【复印期号】199604 +【 标 题 】美国出版社怎样经营管理?——介绍《图书出版的艺术和科学》 +【 正 文 】 + 美国出版的有关图书出版的专业书籍,概论性的有好几本,专讲经营管理的并不多,《图书出版的艺术和科学》(The Art and Scienceo +f Book Pblishing)是经常被人推荐的一本。作者小赫伯特·S·贝利(Herbert·S·Bailey,Jr.),在大学文学系毕业之后,1946年进入普林 +斯顿大学出版社当了8年编辑,1954年出任该出版社社长, 直至1986年退休,从事出版工作共40年。1970年,他写了这本书,1980年再版,199 +0年三版。这本书,广泛地被大学采用作出版课程教材,也被出版社工作人员选作参考读物。从1970年至1990年,时隔20年,他认为他所阐述的 +基本原则仍是正确的,因而出版时基本上没有修改。这本书是作者长期实践经验的总结,他也从管理科学和财会科学书籍中吸取了营养,并听取 +了其他出版家的意见,它是写给出版社的社长们读的,也是写给出版社所有的工作人员读的。有关出版工作的方方面面,它都涉及到了。他对各 +项工作的甜酸苦辣好像都有切身的体会,他了解其中的主要矛盾和麻烦,并对如何解决这些问题提出了很好的建议。他看问题全面、客观,立论 +公正,处处迸发出智慧的火花。凡读过这本书的人,都会感到得益。美国《出版商周刊》曾在书评中把它誉为“出版业经营管理方面不可缺少的 +有说服力的研究著作”。 + 本书从论述出版工作中的理性和非理性开始,作者认为,出版社的经营管理者是一个有理性的人,在理性的环境中与有理性的人们一道工作 +,追求可能是复杂的但至少可以明确表示的目标,而整个出版活动,又沉浸在非理性的大海中。经营管理必须把非理性因素也考虑进去,而不能 +企图将其纳入既定秩序的框架之内,对非理性因素,要在内部和外部和它生活、工作在一起,参与进去,并且试图理解它,甚至促进它、鼓励它 +、批评它,把它和出版社的健康现象联系起来。作者认为,非理性也是一种重要的动力,许多培育出版业成长壮大的创造力来自非理性,来自作 +者的下意识思想的活动和要求;我们称之为市场的读者群同样也被他们特有的下意识的思想和要求所驱动。这些观点,对我们来说,非常陌生, +但可以姑妄听之,以便开阔我们的思路。作者在随后部分中论及的出版工作中的一些原则问题,例如出版的文化功能和企业经营的关系,出版业 +与其他行业的异同,盈利性出版社与非盈利出版社的区别,社会效益与经济效益的关系,质量与数量的关系,出版社与作者的关系,出版社中领 +导与被领导的关系,出版社中部门与部门之间的沟通与协作等,其观点倒是我们很熟悉而且容易理解的;甚至有些观点与我们的看法是相似的。 + 出版社的经营管理相当复杂,既有生产,又有销售,既有精神生产,又有物质生产,而且精神生产还是主要的。要熟悉出版社的全盘业务很 +不容易,但作者做到了这一点。他出身于编辑,认为编辑工作也在经营管理的范围之内,但在本书中,他对编辑工作却着墨不多。他强调编辑工 +作的重要性,认为出版史是出版了杰出的书的杰出出版社的历史;出版社所以出名,是因为出版了杰出的著作,而不是由于经营技巧的高明,当 +然他并不忽视经营管理的重要性,本书毕竟绝大部分篇幅是谈生产、销售、人事、财务等方面的事情。 + 作者精通出版社的所有各项业务,并不是洞察出版社所有各项业务的细节,而是对各项业务都有深刻的了解,能够进行十分精辟的分析。他 +注意出书的系统,认为出版社的全部出版物应该实际上是一套或若干套丛书。出书要十分注意质量。质量第一,但并非质量是唯一。有些书即使 +是能赚钱的好书,但如果不符合出版社出书的兴趣、品位和专业范围,也不应该出版,以免有损于出版社的形象。编辑要时刻想到是为作者出书 +而不是为自己出书。编辑在作者面前代表出版社但要注意到自己不是出版社的老板。美术设计必须在经济实用的约束下去发掘美的特性,一本精 +美图书的特性就在于它能体现书的主题和作者的意向。设计人是应该懂得图书不过是他设计的一个对象,书是给读者阅读的,读者的主要兴趣在 +书的内容。生产部(我们一般称为出版部)主要职责在于恰当处理质量、进度、成本三个相互冲突的目标,他们只提好、快、省,不提多、快、 +好、省。他们认为追逐品种多,必然会降低质量,出版社不论大小,都要编制出版日程表,设专人监督出版工作的进度。许多出版社的重印周期 +是两年或两年以内,一般不会生产超过三年以上的供应量,专业书和学术著作出版社不在此例,必须高一些。每个出版社都应结合自己的特点, +建立自己的发行渠道。销售量的变化,与推销工作的关系很密切,每达到一个新的销售水平,必须支出相应的推销费用。推销工作达到一定程度 +,应考虑支出的推销费用能否达到增加销货的目的。定价是由市场决定的,参考市场情况由出版社规定定价,定价与成本有一定联系,但在市场 +经济条件下,不存在简单的根据成本规定定价的公式。定价高也能卖掉一定数量的书,取得一定的利润;降低定价,到一定程度,也会饱和,未 +必能卖掉更多的书,甚至还会遭致亏损。出版社是盈是亏,应该是可以计算的,并有客观标准的,但也可以有主观成分。盈亏决定于资产和负债 +,但某些资产的实际价值是估计的。决算表所反映的情况,不一定真实,可能是虚亏实盈,也可能虚盈实亏。出版社要注意现金流通,经常在手 +头掌握一定的现金,否则即使是赚钱的出版社,如果周转不灵,也会带来极大的麻烦,但手头积攒太多的现金又是不智的。 + 本书提供了五种不同类型的出版社的典型的损益表,用百分比说明了成本、各种经营费用、毛利、税负、纯利润等之间的比例关系,还开列 +了一本具体的书的经营计划表,用金额说明了实际经营情况,使我们获得了明确而清晰的信息,用来与我们的经营情况进行对比,是非常有价值 +的。书中提出了5 个评估出版社财务情况是否健康的比率(流动资产:流动负债,速兑资产:短期债务,年净销售额:库存额,周转资本:库存 +额,年净销售额:应收帐款额),作为最低限度要求的指标或危险信号,也极有参考价值,本书附录的20多种报表,介绍了一个管理健全的出版 +社,应该有哪些在部门与部门之间沟通信息的工具,很有用。 + 本书用了相当大的篇幅(约1/5),用了许多公式、方程式、 图表,试图科学地解决最佳印数和最佳定价这两个重要而棘手的问题。但只能 +说是一次有益的尝试,是一项有意义的研究成果,其实用价值还是有限的。书中开出的公式以年销售率(第一年销售多少册,以后每年能各销售 +多少册)为基础,销售率只能估计而不能计算出来(教科书和某些有明确对象的专业用书或学习用书例外),而估计很难准确,因为需求充满不 +确定因素。这部分技术性很强,对于方程式和图解法不熟悉的读者来说,要读懂它有一定困难,如果翻译不准确且排印有错误则更为困难。实在 +读不懂的话,可以略去不读,或者只注意作者提出的一些结论性的意见,了解各种因素之间的关系就行了。否则花费了大量时间,而得不到实际 +效果。对我国出版社来讲,当务之急是建立销售记录(最好利用计算机),掌握各类图书历年的销售情况。这是科学管理必不可少的工具,有了 +它,才谈得上研究最佳印数和最佳定价。 + 本书第五章最后一节《新技术的影响》和《新技术》,描绘了电子计算机技术高度发展和广泛应用后的出版业的景象。设想分两步走。第一 +步,出版社利用质量好效率高的自动复印机印书,一次即使印一册也合算,使出版社不以按订单印书(按需生产)。今天收订单,明天就发货。 +没有存货,也用不着计算最佳印数。没有积压,也不会脱销,因而也没有风险,这是很理想的。第二步,到那时,高产高效而价钱又便宜的家用 +电传复印机已像电视机那样普及,读者足不出户,在家中按一下电钮,就可以买到所需要的书,像现在不必去电影院在家中就可以看电影一样。 +出书难、买书难、卖书难的问题彻底解决了。作者预言25年后可能出现这种情况,现已到期,实际上并未实现。现在的情况仍和25年以前一样, +有些读者不愿购买某些学术杂志和专题论文集了,因为到图书馆去复印他所需要的文章要便宜得多。书中的这一部分还是饶有兴趣的,读者如果 +急于了解详细内容,不妨倒过来读,从最后一章读起。 + (本文作者单位:新闻出版署) + (本文责编:文依)* + + + + -- Gitee From a8c2c6f1d091740592d27eefea237dbdec23d1f4 Mon Sep 17 00:00:00 2001 From: zxstty Date: Tue, 13 May 2025 21:37:49 +0800 Subject: [PATCH 05/15] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E8=A7=A3=E6=9E=90?= =?UTF-8?q?=E5=99=A8=E7=9A=84bug,=E4=BF=AE=E5=A4=8D=E8=B5=84=E4=BA=A7?= =?UTF-8?q?=E5=BA=93=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data_chain/apps/app.py | 27 ++++++- data_chain/apps/base/convertor.py | 2 +- .../base/task/worker/acc_testing_worker.py | 4 +- .../apps/base/task/worker/base_worker.py | 11 +-- .../base/task/worker/export_dataset_worker.py | 4 +- .../worker/export_knowledge_base_worker.py | 2 +- .../task/worker/generate_dataset_worker.py | 4 +- .../base/task/worker/import_dataset_worker.py | 12 ++-- .../worker/import_knowledge_base_worker.py | 2 +- .../base/task/worker/parse_document_worker.py | 34 ++++----- data_chain/apps/base/zip_handler.py | 4 +- data_chain/apps/service/chunk_service.py | 2 +- data_chain/apps/service/document_service.py | 31 ++++++-- data_chain/apps/service/session_service.py | 1 + data_chain/entities/common.py | 2 +- data_chain/entities/response_data.py | 2 +- data_chain/manager/document_manager.py | 11 ++- data_chain/manager/document_type_manager.py | 2 +- data_chain/manager/knowledge_manager.py | 18 ++++- data_chain/manager/task_queue_mamanger.py | 2 +- data_chain/manager/user_manager.py | 2 +- data_chain/parser/handler/base_parser.py | 3 +- data_chain/parser/handler/docx_parser.py | 8 +-- data_chain/parser/handler/html_parser.py | 3 - data_chain/parser/handler/pdf_parser.py | 6 +- data_chain/parser/handler/pptx_parser.py | 3 +- data_chain/parser/tools/ocr_tool.py | 2 +- data_chain/parser/tools/token_tool.py | 2 +- data_chain/rag/base_searcher.py | 2 +- data_chain/rag/enhanced_by_llm_searcher.py | 1 - data_chain/stores/database/database.py | 16 ++--- data_chain/stores/mongodb/mongodb.py | 3 +- witchaind_doc/C3-Art0002.txt | 71 ------------------- 33 files changed, 145 insertions(+), 154 deletions(-) delete mode 100644 witchaind_doc/C3-Art0002.txt diff --git a/data_chain/apps/app.py b/data_chain/apps/app.py index 84e7b3d..4b8370a 100644 --- a/data_chain/apps/app.py +++ b/data_chain/apps/app.py @@ -19,6 +19,30 @@ from data_chain.apps.router import ( usr_message, task ) +from data_chain.apps.base.task.worker import ( + base_worker, + export_dataset_worker, + import_dataset_worker, + export_knowledge_base_worker, + import_knowledge_base_worker, + generate_dataset_worker, + acc_testing_worker, + parse_document_worker +) +from data_chain.parser.handler import ( + base_parser, + doc_parser, + docx_parser, + html_parser, + json_parser, + md_parser, + md_zip_parser, + pdf_parser, + pptx_parser, + txt_parser, + xlsx_parser, + yaml_parser, +) from data_chain.stores.database.database import DataBase, ActionEntity, DocumentTypeEntity from data_chain.manager.role_manager import RoleManager from data_chain.manager.document_type_manager import DocumentTypeManager @@ -47,7 +71,7 @@ async def startup_event(): await add_document_type() await init_path() scheduler.add_job(TaskQueueService.handle_tasks, 'interval', seconds=5) - + scheduler.start() async def add_acitons(): for action in actions: @@ -99,7 +123,6 @@ async def configure(): @app.get("/routes") def get_all_routes(action: Annotated[str, Depends(get_route_info)]): - print(action) route_info = [] for route in app.routes: if hasattr(route, "methods") and hasattr(route, "path"): diff --git a/data_chain/apps/base/convertor.py b/data_chain/apps/base/convertor.py index 02e8f5d..2c713a1 100644 --- a/data_chain/apps/base/convertor.py +++ b/data_chain/apps/base/convertor.py @@ -359,7 +359,7 @@ class Convertor: try: task_completed = 0 if task_report is not None: - task_completed = task_report.current_stage/task_report.stage_cnt + task_completed = task_report.current_stage/task_report.stage_cnt*100 task = Task( opId=task_entity.op_id, opName=task_entity.op_name, diff --git a/data_chain/apps/base/task/worker/acc_testing_worker.py b/data_chain/apps/base/task/worker/acc_testing_worker.py index f5312c4..398ae03 100644 --- a/data_chain/apps/base/task/worker/acc_testing_worker.py +++ b/data_chain/apps/base/task/worker/acc_testing_worker.py @@ -386,12 +386,12 @@ class TestingWorker(BaseWorker): if task_entity is None: err = f"[TestingWorker] 任务不存在,task_id: {task_id}" logging.exception(err) - raise err + raise Exception(err) testing_entity = await TestingManager.get_testing_by_testing_id(task_entity.op_id) if testing_entity is None: err = f"[TestingWorker] 测试不存在,测试ID: {task_id}" logging.exception(err) - raise err + raise Exception(err) current_stage = 0 stage_cnt = 4 llm = LLM( diff --git a/data_chain/apps/base/task/worker/base_worker.py b/data_chain/apps/base/task/worker/base_worker.py index ab5d2cf..3e886a2 100644 --- a/data_chain/apps/base/task/worker/base_worker.py +++ b/data_chain/apps/base/task/worker/base_worker.py @@ -3,6 +3,7 @@ import uuid import importlib import os import sys +import inspect from pathlib import Path from data_chain.apps.base.task.process_handler import ProcessHandler from data_chain.config.config import config @@ -24,9 +25,7 @@ class BaseWorker: @staticmethod def find_worker_class(worker_name): subclasses = BaseWorker.__subclasses__() - print(subclasses) for subclass in subclasses: - print(subclass.name) if subclass.name == worker_name: return subclass return None @@ -54,8 +53,9 @@ class BaseWorker: worker_name = await BaseWorker.get_worker_name(task_id) flag = await (BaseWorker.find_worker_class(worker_name).reinit(task_id)) task_entity = await TaskManager.get_task_by_task_id(task_id) + ProcessHandler.remove_task(task_id) if flag: - TaskManager.update_task_by_id(task_id, {"status": TaskStatus.PENDING.value, "retry": task_entity.retry + 1}) + await TaskManager.update_task_by_id(task_id, {"status": TaskStatus.PENDING.value, "retry": task_entity.retry + 1}) return True else: await TaskManager.update_task_by_id(task_id, {"status": TaskStatus.FAILED.value}) @@ -65,6 +65,7 @@ class BaseWorker: async def deinit(task_id: uuid.UUID) -> uuid.UUID: '''析构任务''' worker_name = await BaseWorker.get_worker_name(task_id) + ProcessHandler.remove_task(task_id) await (BaseWorker.find_worker_class(worker_name).deinit(task_id)) await TaskManager.update_task_by_id(task_id, {"status": TaskStatus.SUCCESS.value}) @@ -72,7 +73,7 @@ class BaseWorker: async def run(task_id: uuid.UUID) -> bool: '''运行任务''' worker_name = await BaseWorker.get_worker_name(task_id) - flag = ProcessHandler.add_task(BaseWorker.find_worker_class(worker_name).run, task_id) + flag = ProcessHandler.add_task(task_id, BaseWorker.find_worker_class(worker_name).run, task_id) await TaskManager.update_task_by_id(task_id, {"status": TaskStatus.RUNNING.value}) return flag @@ -82,7 +83,7 @@ class BaseWorker: worker_name = await BaseWorker.get_worker_name(task_id) task_entity = await TaskManager.get_task_by_task_id(task_id) if task_entity.status == TaskStatus.RUNNING.value: - await ProcessHandler.remove_task(task_id) + ProcessHandler.remove_task(task_id) elif task_entity.status == TaskStatus.PENDING.value: await TaskQueueManager.delete_task_by_id(task_id) else: diff --git a/data_chain/apps/base/task/worker/export_dataset_worker.py b/data_chain/apps/base/task/worker/export_dataset_worker.py index 0b85b62..6b3b5d9 100644 --- a/data_chain/apps/base/task/worker/export_dataset_worker.py +++ b/data_chain/apps/base/task/worker/export_dataset_worker.py @@ -151,12 +151,12 @@ class ExportDataSetWorker(BaseWorker): if task_entity is None: err = f"[ExportDataSetWorker] 任务不存在,task_id: {task_id}" logging.exception(err) - raise err + raise Exception(err) dataset_entity = await DatasetManager.get_dataset_by_dataset_id(task_entity.op_id) if dataset_entity is None: err = f"[ExportDataSetWorker] 数据集不存在,数据集ID: {task_id}" logging.exception(err) - raise err + raise Exception(err) current_stage = 0 stage_cnt = 3 tmp_path, source_path, target_path = await ExportDataSetWorker.init_path(task_id) diff --git a/data_chain/apps/base/task/worker/export_knowledge_base_worker.py b/data_chain/apps/base/task/worker/export_knowledge_base_worker.py index 74f04ec..6945aec 100644 --- a/data_chain/apps/base/task/worker/export_knowledge_base_worker.py +++ b/data_chain/apps/base/task/worker/export_knowledge_base_worker.py @@ -173,7 +173,7 @@ class ExportKnowledgeBaseWorker(BaseWorker): if task_entity is None: err = f"[ExportKnowledgeBaseWorker] 任务不存在,task_id: {task_id}" logging.exception(err) - raise err + raise Exception(err) await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.EXPORTING.value}) current_stage = 0 stage_cnt = 6 diff --git a/data_chain/apps/base/task/worker/generate_dataset_worker.py b/data_chain/apps/base/task/worker/generate_dataset_worker.py index 8d2a34c..83c30c0 100644 --- a/data_chain/apps/base/task/worker/generate_dataset_worker.py +++ b/data_chain/apps/base/task/worker/generate_dataset_worker.py @@ -217,7 +217,7 @@ class GenerateDataSetWorker(BaseWorker): if task_entity is None: err = f"[GenerateDataSetWorker] 任务不存在,task_id: {task_id}" logging.exception(err) - raise err + raise Exception(err) llm = LLM( openai_api_key=config['OPENAI_API_KEY'], openai_api_base=config['OPENAI_API_BASE'], @@ -228,7 +228,7 @@ class GenerateDataSetWorker(BaseWorker): if dataset_entity is None: err = f"[GenerateDataSetWorker] 数据集不存在,数据集ID: {task_entity.op_id}" logging.exception(err) - raise err + raise Exception(err) await DatasetManager.update_dataset_by_dataset_id(dataset_entity.id, {"status": DataSetStatus.GENERATING.value}) current_stage = 0 stage_cnt = 3 diff --git a/data_chain/apps/base/task/worker/import_dataset_worker.py b/data_chain/apps/base/task/worker/import_dataset_worker.py index cac5221..6ecf2b6 100644 --- a/data_chain/apps/base/task/worker/import_dataset_worker.py +++ b/data_chain/apps/base/task/worker/import_dataset_worker.py @@ -102,9 +102,9 @@ class ImportDataSetWorker(BaseWorker): file_path = os.path.join(tmp_path, str(dataset_id)) if not os.path.exists(file_path): await MinIO.download_object( - bucket_name=IMPORT_DATASET_PATH_IN_MINIO, - object_name=str(dataset_id), - file_path=file_path + IMPORT_DATASET_PATH_IN_MINIO, + str(dataset_id), + file_path ) return file_path @@ -129,7 +129,7 @@ class ImportDataSetWorker(BaseWorker): if parser_result is None: err = f"[GenerateDataSetWorker] 解析文件失败,文件路径: {file_path},不支持的文件格式" logging.exception(err) - raise err + raise Exception(err) qa_entities = [] if extension == 'xlsx': nodes = parser_result.nodes @@ -213,7 +213,7 @@ class ImportDataSetWorker(BaseWorker): if task_entity is None: err = f"[ImportDataSetWorker] 任务不存在,task_id: {task_id}" logging.exception(err) - raise err + raise Exception(err) llm = LLM( openai_api_key=config['OPENAI_API_KEY'], openai_api_base=config['OPENAI_API_BASE'], @@ -224,7 +224,7 @@ class ImportDataSetWorker(BaseWorker): if dataset_entity is None: err = f"[ImportDataSetWorker] 数据集不存在,数据集ID: {task_entity.op_id}" logging.exception(err) - raise err + raise Exception(err) await DatasetManager.update_dataset_by_dataset_id(dataset_entity.id, {"status": DataSetStatus.IMPORTING.value}) current_stage = 0 stage_cnt = 3 diff --git a/data_chain/apps/base/task/worker/import_knowledge_base_worker.py b/data_chain/apps/base/task/worker/import_knowledge_base_worker.py index 7790a44..2198bb3 100644 --- a/data_chain/apps/base/task/worker/import_knowledge_base_worker.py +++ b/data_chain/apps/base/task/worker/import_knowledge_base_worker.py @@ -193,7 +193,7 @@ class ImportKnowledgeBaseWorker(BaseWorker): if task_entity is None: err = f"[ImportKnowledgeBaseWorker] 任务不存在,task_id: {task_id}" logging.exception(err) - raise err + raise Exception(err) await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.IMPORTING.value}) current_stage = 0 stage_cnt = 7 diff --git a/data_chain/apps/base/task/worker/parse_document_worker.py b/data_chain/apps/base/task/worker/parse_document_worker.py index 8718556..59b5118 100644 --- a/data_chain/apps/base/task/worker/parse_document_worker.py +++ b/data_chain/apps/base/task/worker/parse_document_worker.py @@ -43,14 +43,14 @@ class ParseDocumentWorker(BaseWorker): err = f"[ParseDocumentWorker] 文档不存在,doc_id: {doc_id}" logging.exception(err) raise None - await DocumentManager.update_document_by_doc_id(doc_id, {"status": DocumentStatus.PENDING.value, "abstarct": "", "abstract_vector": None}) + await DocumentManager.update_document_by_doc_id(doc_id, {"status": DocumentStatus.PENDING.value, "abstract": "", "abstract_vector": None}) await ImageManager.update_images_by_doc_id(doc_id, {"status": ImageStatus.DELETED.value}) await ChunkManager.update_chunk_by_doc_id(doc_id, {"status": ChunkStatus.DELETED.value}) task_entity = TaskEntity( team_id=doc_entity.team_id, user_id=doc_entity.author_id, op_id=doc_entity.id, - op_namne=doc_entity.name, + op_name=doc_entity.name, type=TaskType.DOC_PARSE.value, retry=0, status=TaskStatus.PENDING.value) @@ -66,7 +66,7 @@ class ParseDocumentWorker(BaseWorker): logging.exception(err) return False doc_id = task_entity.op_id - await DocumentManager.update_document_by_doc_id(task_entity.op_id, {"abstarct": "", "abstract_vector": None}) + await DocumentManager.update_document_by_doc_id(doc_id, {"status": DocumentStatus.PENDING.value, "abstract": "", "abstract_vector": None}) await ImageManager.update_images_by_doc_id(doc_id, {"status": ImageStatus.DELETED.value}) await ChunkManager.update_chunk_by_doc_id(doc_id, {"status": ChunkStatus.DELETED.value}) tmp_path = os.path.join(DOC_PATH_IN_OS, str(task_id)) @@ -111,20 +111,20 @@ class ParseDocumentWorker(BaseWorker): if doc_entity is None: err = f"[ParseDocumentWorker] 文档不存在,doc_id: {doc_id}" logging.exception(err) - raise err - file_path = os.path.join(tmp_path, str(doc_id)+'.'+doc_entity.extension), + raise Exception(err) + file_path = os.path.join(tmp_path, str(doc_id)+'.'+doc_entity.extension) + print(file_path) await MinIO.download_object( - bucket_name=DOC_PATH_IN_MINIO, - object_name=str(doc_entity.id), - file_path=file_path, + DOC_PATH_IN_MINIO, + str(doc_entity.id), + file_path, ) return file_path @staticmethod - async def parse_doc(file_path: str) -> ParseResult: + async def parse_doc(doc_entity: DocumentEntity, file_path: str) -> ParseResult: '''解析文档''' - extension = os.path.splitext(file_path)[-1] - parse_result = await BaseParser.parser(extension, file_path) + parse_result = await BaseParser.parser(doc_entity.extension, file_path) return parse_result @staticmethod @@ -409,7 +409,7 @@ class ParseDocumentWorker(BaseWorker): status=ChunkStatus.EXISTED.value ) chunk_entities.append(chunk_entity) - if global_offset and node[global_offset].type != node[global_offset-1].type: + if global_offset and parse_result.nodes[global_offset].type != parse_result.nodes[global_offset-1].type: local_offset = 0 local_offset += 1 global_offset += 1 @@ -429,12 +429,12 @@ class ParseDocumentWorker(BaseWorker): if task_entity is None: err = f"[ParseDocumentWorker] 任务不存在,task_id: {task_id}" logging.exception(err) - raise err + raise Exception(err) doc_entity = await DocumentManager.get_document_by_doc_id(task_entity.op_id) if doc_entity is None: err = f"[ParseDocumentWorker] 文档不存在,doc_id: {task_entity.op_id}" logging.exception(err) - raise err + raise Exception(err) await DocumentManager.update_document_by_doc_id(task_entity.op_id, {"status": DocumentStatus.RUNNING.value}) try: if doc_entity.parse_method == ParseMethod.EHANCED: @@ -453,7 +453,7 @@ class ParseDocumentWorker(BaseWorker): current_stage += 1 await ParseDocumentWorker.report(task_id, '下载文档', current_stage, stage_cnt) file_path = os.path.join(tmp_path, str(task_entity.op_id)+'.'+doc_entity.extension) - parse_result = await ParseDocumentWorker.parse_doc(file_path) + parse_result = await ParseDocumentWorker.parse_doc(doc_entity, file_path) current_stage += 1 await ParseDocumentWorker.report(task_id, '解析文档', current_stage, stage_cnt) await ParseDocumentWorker.handle_parse_result(parse_result, doc_entity, llm) @@ -484,7 +484,7 @@ class ParseDocumentWorker(BaseWorker): except Exception as e: err = f"[DocParseWorker] 任务失败,task_id: {task_id},错误信息: {e}" logging.exception(err) - await TaskManager.update_task_by_id(task_id, {"status": TaskStatus.FAILED.value}) + await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.FAILED.value)) await ParseDocumentWorker.report(task_id, err, 0, 1) return None @@ -498,7 +498,7 @@ class ParseDocumentWorker(BaseWorker): return None await DocumentManager.update_document_by_doc_id(task_entity.op_id, {"status": DocumentStatus.IDLE.value}) if task_entity.status == TaskStatus.PENDING.value or task_entity.status == TaskStatus.RUNNING.value or task_entity.status == TaskStatus.FAILED.value: - await DocumentManager.update_document_by_doc_id(task_entity.op_id, {"abstarct": "", "abstract_vector": None}) + await DocumentManager.update_document_by_doc_id(task_entity.op_id, {"status": DocumentStatus.PENDING.value, "abstract": "", "abstract_vector": None}) await ImageManager.update_images_by_doc_id(task_entity.op_id, {"status": ImageStatus.DELETED.value}) await ChunkManager.update_chunk_by_doc_id(task_entity.op_id, {"status": ChunkStatus.DELETED.value}) tmp_path = os.path.join(DOC_PATH_IN_OS, str(task_id)) diff --git a/data_chain/apps/base/zip_handler.py b/data_chain/apps/base/zip_handler.py index 41ca1ee..d1a48e5 100644 --- a/data_chain/apps/base/zip_handler.py +++ b/data_chain/apps/base/zip_handler.py @@ -18,13 +18,13 @@ class ZipHandler(): if len(to_zip_file.filelist) > max_file_num: err = f"压缩文件{zip_file_path}的数量超过了上限" logging.error("[ZipHandler] %s", err) - raise err + raise Exception(err) for file in to_zip_file.filelist: total_size += file.file_size if total_size > max_file_size: err = f"压缩文件{zip_file_path}的尺寸超过了上限" logging.error("[ZipHandler] %s", err) - raise err + raise Exception(err) to_zip_file.namelist() for member in to_zip_file.infolist(): to_zip_file.open(member) diff --git a/data_chain/apps/service/chunk_service.py b/data_chain/apps/service/chunk_service.py index 833e9ec..662e5b0 100644 --- a/data_chain/apps/service/chunk_service.py +++ b/data_chain/apps/service/chunk_service.py @@ -132,4 +132,4 @@ class ChunkService: except Exception as e: err = "更新分片失败" logging.exception("[ChunkService] %s", err) - raise err + raise Exception(err) diff --git a/data_chain/apps/service/document_service.py b/data_chain/apps/service/document_service.py index 1243f5c..293a9b9 100644 --- a/data_chain/apps/service/document_service.py +++ b/data_chain/apps/service/document_service.py @@ -3,6 +3,7 @@ import aiofiles from fastapi import APIRouter, Depends, Query, Body, File, UploadFile import uuid import traceback +import shutil import os from data_chain.entities.request_data import ( ListDocumentRequest, @@ -69,9 +70,9 @@ class DocumentService: doc_entity, doc_type_entity) if doc_entity.id in task_dict.keys(): task_entity = task_dict[doc_entity.id] - task_report = task_report_dict.get(task_entity, None) + task_report = task_report_dict.get(task_entity.id, None) task = await Convertor.convert_task_entity_to_task(task_entity, task_report) - document.task = task + document.parse_task = task documents.append(document) list_document_msg = ListDocumentMsg(total=total, documents=documents) return list_document_msg @@ -140,19 +141,33 @@ class DocumentService: err = f"知识库不存在, 知识库ID: {kb_id}" logging.error("[DocumentService] %s", err) raise ValueError(err) + doc_cnt = len(docs) + doc_sz = 0 + for doc in docs: + doc_sz += doc.size + if doc_cnt > kb_entity.upload_count_limit or doc_sz > kb_entity.upload_size_limit*1024*1024: + err = f"上传文档数量或大小超过限制, 知识库ID: {kb_id}, 上传文档数量: {doc_cnt}, 上传文档大小: {doc_sz}" + logging.error("[DocumentService] %s", err) + raise ValueError(err) doc_entities = [] for doc in docs: try: - document_file_path = os.path.join(DOC_PATH_IN_OS, doc.filename) + id = uuid.uuid4() + tmp_path = os.path.join(DOC_PATH_IN_OS, str(id)) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + os.makedirs(tmp_path) + document_file_path = os.path.join(tmp_path, doc.filename) async with aiofiles.open(document_file_path, "wb") as f: content = await doc.read() await f.write(content) - id = uuid.uuid4() await MinIO.put_object( bucket_name=DOC_PATH_IN_MINIO, file_index=str(id), file_path=document_file_path ) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) doc_entity = DocumentEntity( id=id, team_id=kb_entity.team_id, @@ -161,7 +176,7 @@ class DocumentService: author_name=user_sub, name=doc.filename, extension=doc.filename.split('.')[-1], - size=len(content), + size=doc.size, parse_method=kb_entity.default_parse_method, parse_relut_topology=None, chunk_size=kb_entity.default_chunk_size, @@ -189,6 +204,7 @@ class DocumentService: for doc_entity in doc_entities: await TaskQueueService.init_task(TaskType.DOC_PARSE.value, doc_entity.id) doc_ids = [doc_entity.id for doc_entity in doc_entities] + await KnowledgeBaseManager.update_doc_cnt_and_doc_size(kb_id=kb_entity.id) return doc_ids @staticmethod @@ -233,6 +249,11 @@ class DocumentService: doc_entities = await DocumentManager.update_document_by_doc_ids( doc_ids, {"status": DocumentStatus.DELETED.value}) doc_ids = [doc_entity.id for doc_entity in doc_entities] + kb_entities = await KnowledgeBaseManager.list_kb_entity_by_doc_ids(doc_ids) + kb_ids = [kb_entity.id for kb_entity in kb_entities] + kb_ids = list(set(kb_ids)) + for kb_id in kb_ids: + await KnowledgeBaseManager.update_doc_cnt_and_doc_size(kb_id=kb_id) return doc_ids except Exception as e: err = "删除文档失败" diff --git a/data_chain/apps/service/session_service.py b/data_chain/apps/service/session_service.py index 8b492ab..1e00568 100644 --- a/data_chain/apps/service/session_service.py +++ b/data_chain/apps/service/session_service.py @@ -37,6 +37,7 @@ async def verify_user(request: HTTPConnection): async def get_user_sub(request: HTTPConnection) -> uuid: """从Session中获取用户""" + await UserManager.add_user((await Convertor.convert_user_sub_to_user_entity('admin'))) return "admin" if config["DEBUG"]: user_sub = "admin" diff --git a/data_chain/entities/common.py b/data_chain/entities/common.py index 395abdd..2e48ed2 100644 --- a/data_chain/entities/common.py +++ b/data_chain/entities/common.py @@ -354,7 +354,7 @@ EXPORT_DATASET_PATH_IN_MINIO = "witchaind-dataset-export" IMPORT_DATASET_PATH_IN_MINIO = "witchaind-dataset-import" TESTING_REPORT_PATH_IN_MINIO = "witchaind-testing-report" -DOC_PATH_IN_OS = "./witchaind_doc" +DOC_PATH_IN_OS = "./witchaind-doc" EXPORT_KB_PATH_IN_OS = "./witchaind-kb-export" IMPORT_KB_PATH_IN_OS = "./witchaind-kb-import" EXPORT_DATASET_PATH_IN_OS = "./witchaind-dataset-export" diff --git a/data_chain/entities/response_data.py b/data_chain/entities/response_data.py index 953a257..09d1ee6 100644 --- a/data_chain/entities/response_data.py +++ b/data_chain/entities/response_data.py @@ -230,7 +230,7 @@ class Document(BaseModel): doc_type: DocumentType = Field(description="文档类型", alias="docType") chunk_size: int = Field(description="文档分片大小", alias="chunkSize") created_time: str = Field(description="文档创建时间", alias="createdTime") - parse_task: Optional[Task] = Field(description="文档任务", alias="docTask") + parse_task: Optional[Task] = Field(default=None, description="文档任务", alias="docTask") parse_method: ParseMethod = Field(description="文档解析方法", alias="parseMethod") enabled: bool = Field(description="文档是否启用", alias="enabled") author_name: str = Field(description="文档创建者的用户名", alias="authorName") diff --git a/data_chain/manager/document_manager.py b/data_chain/manager/document_manager.py index 4db614c..e36e544 100644 --- a/data_chain/manager/document_manager.py +++ b/data_chain/manager/document_manager.py @@ -32,7 +32,16 @@ class DocumentManager(): @staticmethod async def add_documents(document_entities: List[DocumentEntity]) -> List[DocumentEntity]: """批量添加文档""" - pass + try: + async with await DataBase.get_session() as session: + session.add_all(document_entities) + await session.commit() + for document_entity in document_entities: + await session.refresh(document_entity) + return document_entities + except Exception as e: + err = "批量添加文档失败" + logging.exception("[DocumentManager] %s", err) @staticmethod async def get_top_k_document_by_kb_id_vector( diff --git a/data_chain/manager/document_type_manager.py b/data_chain/manager/document_type_manager.py index ca422ea..db65b04 100644 --- a/data_chain/manager/document_type_manager.py +++ b/data_chain/manager/document_type_manager.py @@ -21,7 +21,7 @@ class DocumentTypeManager(): return document_type_entity except Exception as e: err = "添加文档类型失败" - logging.exception("[DocumentTypeManager] %s", err) + logging.error("[DocumentTypeManager] %s", err) @staticmethod async def add_document_types( diff --git a/data_chain/manager/knowledge_manager.py b/data_chain/manager/knowledge_manager.py index 56d3eac..3ff0573 100644 --- a/data_chain/manager/knowledge_manager.py +++ b/data_chain/manager/knowledge_manager.py @@ -86,6 +86,22 @@ class KnowledgeBaseManager(): logging.exception("[KnowledgeBaseManager] %s", err) raise e + @staticmethod + async def list_kb_entity_by_doc_ids(doc_ids: List[uuid.UUID]) -> List[KnowledgeBaseEntity]: + """根据文档ID获取知识库""" + try: + async with await DataBase.get_session() as session: + stmt = select(KnowledgeBaseEntity).join(DocumentEntity).where( + and_(DocumentEntity.id.in_(doc_ids), + DocumentEntity.status != DocumentStatus.DELETED.value)) + result = await session.execute(stmt) + knowledge_base_entities = result.scalars().all() + return knowledge_base_entities + except Exception as e: + err = "获取知识库失败" + logging.exception("[KnowledgeBaseManager] %s", err) + raise e + @staticmethod async def list_doc_types_by_kb_id(kb_id: uuid.UUID) -> List[DocumentTypeEntity]: """列出知识库文档类型""" @@ -127,7 +143,7 @@ class KnowledgeBaseManager(): result = await session.execute(stmt) doc_cnt, doc_size = result.first() stmt = update(KnowledgeBaseEntity).where(KnowledgeBaseEntity.id == kb_id).values( - document_count=doc_cnt, document_size=doc_size) + doc_cnt=doc_cnt, doc_size=doc_size) await session.execute(stmt) await session.commit() except Exception as e: diff --git a/data_chain/manager/task_queue_mamanger.py b/data_chain/manager/task_queue_mamanger.py index 4523e19..6917b98 100644 --- a/data_chain/manager/task_queue_mamanger.py +++ b/data_chain/manager/task_queue_mamanger.py @@ -41,7 +41,7 @@ class TaskQueueManager(): try: async with MongoDB.get_session() as session: task_colletion = MongoDB.get_collection('witchiand_task') - task = await task_colletion.find_one({"status": status.value}, sort=[("created_time", 1)], session=session) + task = await task_colletion.find_one({"status": status}, sort=[("created_time", 1)], session=session) return Task(**task) if task else None except Exception as e: err = "获取最早的任务失败" diff --git a/data_chain/manager/user_manager.py b/data_chain/manager/user_manager.py index 2277872..43391fe 100644 --- a/data_chain/manager/user_manager.py +++ b/data_chain/manager/user_manager.py @@ -18,5 +18,5 @@ class UserManager: return True except Exception as e: err = "用户添加失败" - logging.exception("[UserManger] %s", err) + logging.error("[UserManger] %s", err) return False diff --git a/data_chain/parser/handler/base_parser.py b/data_chain/parser/handler/base_parser.py index 113675c..72dd27c 100644 --- a/data_chain/parser/handler/base_parser.py +++ b/data_chain/parser/handler/base_parser.py @@ -7,6 +7,7 @@ class BaseParser: @staticmethod def find_worker_class(worker_name): subclasses = BaseParser.__subclasses__() + print(subclasses) for subclass in subclasses: if subclass.name == worker_name: return subclass @@ -43,4 +44,4 @@ class BaseParser: else: err = f"[BaseParser] 解析器不存在,parser_method: {parser_method}" logging.exception(err) - raise err + raise Exception(err) diff --git a/data_chain/parser/handler/docx_parser.py b/data_chain/parser/handler/docx_parser.py index 728aa6e..e4db34f 100644 --- a/data_chain/parser/handler/docx_parser.py +++ b/data_chain/parser/handler/docx_parser.py @@ -70,7 +70,7 @@ class DocxParser(BaseParser): else: err = "不支持的父元素类型" logging.exception("[DocxParser] %s", err) - raise err + raise Exception(err) nodes = [] for child in parent_elm.iterchildren(): @@ -189,7 +189,7 @@ class DocxParser(BaseParser): if not doc: err = "无法打开docx文件" logging.exception("[DocxParser] %s", err) - raise err + raise Exception(err) nodes = await DocxParser.docx_to_parse_nodes(doc) DocxParser.image_related_node_in_link_nodes(nodes) parse_result = ParseResult( @@ -197,7 +197,3 @@ class DocxParser(BaseParser): nodes=nodes ) return parse_result - - -nodes = asyncio.run(DocxParser.parser("通算融合算子收益分析.docx")) -print(nodes) diff --git a/data_chain/parser/handler/html_parser.py b/data_chain/parser/handler/html_parser.py index a71b19d..b0eacd5 100644 --- a/data_chain/parser/handler/html_parser.py +++ b/data_chain/parser/handler/html_parser.py @@ -181,12 +181,9 @@ class HTMLParser(BaseParser): async def parse(file_path) -> ParseResult: with open(file_path, 'r', encoding='utf-8', errors='ignore') as file: html = file.read() - print(html) nodes = await HTMLParser.html_to_tree(html) return ParseResult( parse_topology_type=DocParseRelutTopology.TREE, nodes=nodes ) - -nodes = asyncio.run(HTMLParser.parse("example.html")) diff --git a/data_chain/parser/handler/pdf_parser.py b/data_chain/parser/handler/pdf_parser.py index 0161079..5cfe762 100644 --- a/data_chain/parser/handler/pdf_parser.py +++ b/data_chain/parser/handler/pdf_parser.py @@ -210,13 +210,9 @@ class PdfParser(BaseParser): sub_nodes_with_bbox, image_nodes_with_bbox) nodes_with_bbox.extend(sub_nodes_with_bbox) nodes = [node_with_bbox.node for node_with_bbox in nodes_with_bbox] - DocxParser.image_related_node_in_link_nodes(nodes) + PdfParser.image_related_node_in_link_nodes(nodes) parse_result = ParseResult( parse_topology_type=DocParseRelutTopology.GRAPH, nodes=nodes ) return parse_result - - -tmp = asyncio.run(PdfParser.parser("KOBA样册2022.pdf")) -print(tmp) diff --git a/data_chain/parser/handler/pptx_parser.py b/data_chain/parser/handler/pptx_parser.py index 5dcacff..98d7e76 100644 --- a/data_chain/parser/handler/pptx_parser.py +++ b/data_chain/parser/handler/pptx_parser.py @@ -95,7 +95,8 @@ class PptxParser(BaseParser): except Exception as e: err = "PPTX文件解析失败" logging.exception("[PptxParser] %s", err) - DocxParser.image_related_node_in_link_nodes(nodes) + nodes = await PptxParser.ppt_to_parse_nodes(pptx) + PptxParser.image_related_node_in_link_nodes(nodes) parse_result = ParseResult( parse_topology_type=DocParseRelutTopology.LIST, nodes=nodes diff --git a/data_chain/parser/tools/ocr_tool.py b/data_chain/parser/tools/ocr_tool.py index 8193f53..c888225 100644 --- a/data_chain/parser/tools/ocr_tool.py +++ b/data_chain/parser/tools/ocr_tool.py @@ -22,7 +22,7 @@ class OcrTool: @staticmethod async def ocr_from_image(image: np.ndarray) -> list: try: - ocr_result = await ocr_result.model.ocr(image, cls=True) + ocr_result = await OcrTool.model.ocr(image) if ocr_result is None or ocr_result[0] is None: return None return ocr_result diff --git a/data_chain/parser/tools/token_tool.py b/data_chain/parser/tools/token_tool.py index 32c75ca..3adc254 100644 --- a/data_chain/parser/tools/token_tool.py +++ b/data_chain/parser/tools/token_tool.py @@ -139,7 +139,7 @@ class TokenTool: def get_top_k_keywords(content: str, k=10) -> list: try: # 使用jieba提取关键词 - keywords = extract_tags(content, top_k=k, withWeight=True) + keywords = extract_tags(content, topK=k, withWeight=True) return [keyword for keyword, weight in keywords] except Exception as e: err = f"[TokenTool] 获取关键词失败 {e}" diff --git a/data_chain/rag/base_searcher.py b/data_chain/rag/base_searcher.py index f0b94b7..284ee34 100644 --- a/data_chain/rag/base_searcher.py +++ b/data_chain/rag/base_searcher.py @@ -38,7 +38,7 @@ class BaseSearcher: else: err = f"[BaseSearch] 检索器不存在,search_method: {search_method}" logging.exception(err) - raise err + raise Exception(err) @staticmethod async def rerank(chunk_entities: list[ChunkEntity], query: str) -> list[ChunkEntity]: diff --git a/data_chain/rag/enhanced_by_llm_searcher.py b/data_chain/rag/enhanced_by_llm_searcher.py index 066928d..aab7c04 100644 --- a/data_chain/rag/enhanced_by_llm_searcher.py +++ b/data_chain/rag/enhanced_by_llm_searcher.py @@ -65,4 +65,3 @@ class EnhancedByLLMSearcher(BaseSearcher): err = f"[KeywordVectorSearcher] 关键词向量检索失败,error: {e}" logging.exception(err) return [] - return chunk_entities diff --git a/data_chain/stores/database/database.py b/data_chain/stores/database/database.py index b247dfd..6a068f1 100644 --- a/data_chain/stores/database/database.py +++ b/data_chain/stores/database/database.py @@ -19,6 +19,7 @@ from data_chain.entities.enum import (Tokenizer, DocParseRelutTopology, DocumentStatus, ChunkType, + ImageStatus, ChunkParseTopology, DataSetStatus, TestingStatus, @@ -338,6 +339,7 @@ class ImageEntity(Base): doc_id = Column(UUID) # 图片所属文档id chunk_id = Column(UUID) # 图片所属chunk的id extension = Column(String) # 图片后缀 + status = Column(String, default=ImageStatus.EXISTED.value) # 图片状态 created_time = Column( TIMESTAMP(timezone=True), nullable=True, @@ -530,17 +532,15 @@ class TaskReportEntity(Base): class DataBase: - engine = None + engine = create_async_engine( + config['DATABASE_URL'], + echo=False, + pool_recycle=300, + pool_pre_ping=True + ) @classmethod async def init_all_table(cls): - if DataBase.engine is None: - DataBase.engine = create_async_engine( - config['DATABASE_URL'], - echo=False, - pool_recycle=300, - pool_pre_ping=True - ) if 'opengauss' in config['DATABASE_URL']: from opengauss_sqlalchemy.register_async import register_vector diff --git a/data_chain/stores/mongodb/mongodb.py b/data_chain/stores/mongodb/mongodb.py index 02d58b7..3b4d4be 100644 --- a/data_chain/stores/mongodb/mongodb.py +++ b/data_chain/stores/mongodb/mongodb.py @@ -48,7 +48,8 @@ class MongoDB: host = config['MONGODB_HOST'] port = config['MONGODB_PORT'] _client: AsyncMongoClient = AsyncMongoClient( - f"mongodb://{user}:{password}@{host}:{port}/?directConnection=true&replicaSet=rs0", + f"mongodb://{user}:{password}@{host}:{port}/?directConnection=true&replicaSet=mongo_rs", + uuidRepresentation="standard" ) @classmethod diff --git a/witchaind_doc/C3-Art0002.txt b/witchaind_doc/C3-Art0002.txt deleted file mode 100644 index 748a9f1..0000000 --- a/witchaind_doc/C3-Art0002.txt +++ /dev/null @@ -1,71 +0,0 @@ -【 文献号 】1-2432 -【原文出处】出版发行研究 -【原刊地名】京 -【原刊期号】199601 -【原刊页号】40-41 -【分 类 号】Z1 -【分 类 名】出版工作、图书评介 -【 作 者 】王益 -【复印期号】199604 -【 标 题 】美国出版社怎样经营管理?——介绍《图书出版的艺术和科学》 -【 正 文 】 - 美国出版的有关图书出版的专业书籍,概论性的有好几本,专讲经营管理的并不多,《图书出版的艺术和科学》(The Art and Scienceo -f Book Pblishing)是经常被人推荐的一本。作者小赫伯特·S·贝利(Herbert·S·Bailey,Jr.),在大学文学系毕业之后,1946年进入普林 -斯顿大学出版社当了8年编辑,1954年出任该出版社社长, 直至1986年退休,从事出版工作共40年。1970年,他写了这本书,1980年再版,199 -0年三版。这本书,广泛地被大学采用作出版课程教材,也被出版社工作人员选作参考读物。从1970年至1990年,时隔20年,他认为他所阐述的 -基本原则仍是正确的,因而出版时基本上没有修改。这本书是作者长期实践经验的总结,他也从管理科学和财会科学书籍中吸取了营养,并听取 -了其他出版家的意见,它是写给出版社的社长们读的,也是写给出版社所有的工作人员读的。有关出版工作的方方面面,它都涉及到了。他对各 -项工作的甜酸苦辣好像都有切身的体会,他了解其中的主要矛盾和麻烦,并对如何解决这些问题提出了很好的建议。他看问题全面、客观,立论 -公正,处处迸发出智慧的火花。凡读过这本书的人,都会感到得益。美国《出版商周刊》曾在书评中把它誉为“出版业经营管理方面不可缺少的 -有说服力的研究著作”。 - 本书从论述出版工作中的理性和非理性开始,作者认为,出版社的经营管理者是一个有理性的人,在理性的环境中与有理性的人们一道工作 -,追求可能是复杂的但至少可以明确表示的目标,而整个出版活动,又沉浸在非理性的大海中。经营管理必须把非理性因素也考虑进去,而不能 -企图将其纳入既定秩序的框架之内,对非理性因素,要在内部和外部和它生活、工作在一起,参与进去,并且试图理解它,甚至促进它、鼓励它 -、批评它,把它和出版社的健康现象联系起来。作者认为,非理性也是一种重要的动力,许多培育出版业成长壮大的创造力来自非理性,来自作 -者的下意识思想的活动和要求;我们称之为市场的读者群同样也被他们特有的下意识的思想和要求所驱动。这些观点,对我们来说,非常陌生, -但可以姑妄听之,以便开阔我们的思路。作者在随后部分中论及的出版工作中的一些原则问题,例如出版的文化功能和企业经营的关系,出版业 -与其他行业的异同,盈利性出版社与非盈利出版社的区别,社会效益与经济效益的关系,质量与数量的关系,出版社与作者的关系,出版社中领 -导与被领导的关系,出版社中部门与部门之间的沟通与协作等,其观点倒是我们很熟悉而且容易理解的;甚至有些观点与我们的看法是相似的。 - 出版社的经营管理相当复杂,既有生产,又有销售,既有精神生产,又有物质生产,而且精神生产还是主要的。要熟悉出版社的全盘业务很 -不容易,但作者做到了这一点。他出身于编辑,认为编辑工作也在经营管理的范围之内,但在本书中,他对编辑工作却着墨不多。他强调编辑工 -作的重要性,认为出版史是出版了杰出的书的杰出出版社的历史;出版社所以出名,是因为出版了杰出的著作,而不是由于经营技巧的高明,当 -然他并不忽视经营管理的重要性,本书毕竟绝大部分篇幅是谈生产、销售、人事、财务等方面的事情。 - 作者精通出版社的所有各项业务,并不是洞察出版社所有各项业务的细节,而是对各项业务都有深刻的了解,能够进行十分精辟的分析。他 -注意出书的系统,认为出版社的全部出版物应该实际上是一套或若干套丛书。出书要十分注意质量。质量第一,但并非质量是唯一。有些书即使 -是能赚钱的好书,但如果不符合出版社出书的兴趣、品位和专业范围,也不应该出版,以免有损于出版社的形象。编辑要时刻想到是为作者出书 -而不是为自己出书。编辑在作者面前代表出版社但要注意到自己不是出版社的老板。美术设计必须在经济实用的约束下去发掘美的特性,一本精 -美图书的特性就在于它能体现书的主题和作者的意向。设计人是应该懂得图书不过是他设计的一个对象,书是给读者阅读的,读者的主要兴趣在 -书的内容。生产部(我们一般称为出版部)主要职责在于恰当处理质量、进度、成本三个相互冲突的目标,他们只提好、快、省,不提多、快、 -好、省。他们认为追逐品种多,必然会降低质量,出版社不论大小,都要编制出版日程表,设专人监督出版工作的进度。许多出版社的重印周期 -是两年或两年以内,一般不会生产超过三年以上的供应量,专业书和学术著作出版社不在此例,必须高一些。每个出版社都应结合自己的特点, -建立自己的发行渠道。销售量的变化,与推销工作的关系很密切,每达到一个新的销售水平,必须支出相应的推销费用。推销工作达到一定程度 -,应考虑支出的推销费用能否达到增加销货的目的。定价是由市场决定的,参考市场情况由出版社规定定价,定价与成本有一定联系,但在市场 -经济条件下,不存在简单的根据成本规定定价的公式。定价高也能卖掉一定数量的书,取得一定的利润;降低定价,到一定程度,也会饱和,未 -必能卖掉更多的书,甚至还会遭致亏损。出版社是盈是亏,应该是可以计算的,并有客观标准的,但也可以有主观成分。盈亏决定于资产和负债 -,但某些资产的实际价值是估计的。决算表所反映的情况,不一定真实,可能是虚亏实盈,也可能虚盈实亏。出版社要注意现金流通,经常在手 -头掌握一定的现金,否则即使是赚钱的出版社,如果周转不灵,也会带来极大的麻烦,但手头积攒太多的现金又是不智的。 - 本书提供了五种不同类型的出版社的典型的损益表,用百分比说明了成本、各种经营费用、毛利、税负、纯利润等之间的比例关系,还开列 -了一本具体的书的经营计划表,用金额说明了实际经营情况,使我们获得了明确而清晰的信息,用来与我们的经营情况进行对比,是非常有价值 -的。书中提出了5 个评估出版社财务情况是否健康的比率(流动资产:流动负债,速兑资产:短期债务,年净销售额:库存额,周转资本:库存 -额,年净销售额:应收帐款额),作为最低限度要求的指标或危险信号,也极有参考价值,本书附录的20多种报表,介绍了一个管理健全的出版 -社,应该有哪些在部门与部门之间沟通信息的工具,很有用。 - 本书用了相当大的篇幅(约1/5),用了许多公式、方程式、 图表,试图科学地解决最佳印数和最佳定价这两个重要而棘手的问题。但只能 -说是一次有益的尝试,是一项有意义的研究成果,其实用价值还是有限的。书中开出的公式以年销售率(第一年销售多少册,以后每年能各销售 -多少册)为基础,销售率只能估计而不能计算出来(教科书和某些有明确对象的专业用书或学习用书例外),而估计很难准确,因为需求充满不 -确定因素。这部分技术性很强,对于方程式和图解法不熟悉的读者来说,要读懂它有一定困难,如果翻译不准确且排印有错误则更为困难。实在 -读不懂的话,可以略去不读,或者只注意作者提出的一些结论性的意见,了解各种因素之间的关系就行了。否则花费了大量时间,而得不到实际 -效果。对我国出版社来讲,当务之急是建立销售记录(最好利用计算机),掌握各类图书历年的销售情况。这是科学管理必不可少的工具,有了 -它,才谈得上研究最佳印数和最佳定价。 - 本书第五章最后一节《新技术的影响》和《新技术》,描绘了电子计算机技术高度发展和广泛应用后的出版业的景象。设想分两步走。第一 -步,出版社利用质量好效率高的自动复印机印书,一次即使印一册也合算,使出版社不以按订单印书(按需生产)。今天收订单,明天就发货。 -没有存货,也用不着计算最佳印数。没有积压,也不会脱销,因而也没有风险,这是很理想的。第二步,到那时,高产高效而价钱又便宜的家用 -电传复印机已像电视机那样普及,读者足不出户,在家中按一下电钮,就可以买到所需要的书,像现在不必去电影院在家中就可以看电影一样。 -出书难、买书难、卖书难的问题彻底解决了。作者预言25年后可能出现这种情况,现已到期,实际上并未实现。现在的情况仍和25年以前一样, -有些读者不愿购买某些学术杂志和专题论文集了,因为到图书馆去复印他所需要的文章要便宜得多。书中的这一部分还是饶有兴趣的,读者如果 -急于了解详细内容,不妨倒过来读,从最后一章读起。 - (本文作者单位:新闻出版署) - (本文责编:文依)* - - - - -- Gitee From 8a777797a6ba792053f6ad06a22b2f88ebe34e51 Mon Sep 17 00:00:00 2001 From: zxstty Date: Wed, 14 May 2025 12:07:25 +0800 Subject: [PATCH 06/15] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E8=B5=84=E4=BA=A7?= =?UTF-8?q?=E5=BA=93=E5=AF=BC=E5=87=BA=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data_chain/apps/app.py | 7 ++- .../base/task/worker/acc_testing_worker.py | 12 ++--- .../base/task/worker/export_dataset_worker.py | 8 ++-- .../worker/export_knowledge_base_worker.py | 30 +++++++------ .../base/task/worker/import_dataset_worker.py | 4 +- .../worker/import_knowledge_base_worker.py | 2 +- .../base/task/worker/parse_document_worker.py | 44 ++++++++++++------- .../apps/service/acc_testing_service.py | 4 +- data_chain/apps/service/dataset_service.py | 4 +- data_chain/apps/service/document_service.py | 4 +- .../apps/service/knwoledge_base_service.py | 4 +- data_chain/apps/service/session_service.py | 18 ++++++-- data_chain/apps/service/task_queue_service.py | 21 ++++++--- data_chain/entities/enum.py | 1 + data_chain/manager/task_manager.py | 6 +-- data_chain/manager/task_queue_mamanger.py | 13 ++++++ data_chain/parser/handler/base_parser.py | 1 - data_chain/stores/database/database.py | 18 ++++---- 18 files changed, 126 insertions(+), 75 deletions(-) diff --git a/data_chain/apps/app.py b/data_chain/apps/app.py index 4b8370a..9aa6cd9 100644 --- a/data_chain/apps/app.py +++ b/data_chain/apps/app.py @@ -5,6 +5,7 @@ from apscheduler.schedulers.asyncio import AsyncIOScheduler import uvicorn import fastapi import os +import shutil from data_chain.entities.common import actions, DEFAULt_DOC_TYPE_ID from data_chain.apps.router import ( team, @@ -73,6 +74,7 @@ async def startup_event(): scheduler.add_job(TaskQueueService.handle_tasks, 'interval', seconds=5) scheduler.start() + async def add_acitons(): for action in actions: action_entity = ActionEntity( @@ -102,8 +104,9 @@ async def init_path(): TESTING_REPORT_PATH_IN_OS ] for path in paths: - if not os.path.exists(path): - os.makedirs(path) + if os.path.exists(path): + shutil.rmtree(path) + os.makedirs(path) async def configure(): diff --git a/data_chain/apps/base/task/worker/acc_testing_worker.py b/data_chain/apps/base/task/worker/acc_testing_worker.py index 398ae03..c7d7d2c 100644 --- a/data_chain/apps/base/task/worker/acc_testing_worker.py +++ b/data_chain/apps/base/task/worker/acc_testing_worker.py @@ -67,8 +67,8 @@ class TestingWorker(BaseWorker): "ave_jac": -1, }) await MinIO.delete_object( - bucket_name=TESTING_REPORT_PATH_IN_MINIO, - object_name=str(testing_id) + TESTING_REPORT_PATH_IN_MINIO, + str(testing_id) ) return task_entity.id @@ -95,8 +95,8 @@ class TestingWorker(BaseWorker): }) await TestCaseManager.update_test_case_by_testing_id(task_entity.op_id, {"status": TestCaseStatus.DELETED.value}) await MinIO.delete_object( - bucket_name=TESTING_REPORT_PATH_IN_MINIO, - object_name=str(task_entity.op_id) + TESTING_REPORT_PATH_IN_MINIO, + str(task_entity.op_id) ) await TestingManager.update_testing_by_testing_id(task_entity.op_id, { "ave_score": -1, @@ -436,8 +436,8 @@ class TestingWorker(BaseWorker): if task_entity.status == TaskStatus.PENDING.value or task_entity.status == TaskStatus.RUNNING.value or task_entity.status == TaskStatus.FAILED.value: await TestCaseManager.update_test_case_by_testing_id(task_entity.op_id, {"status": TestCaseStatus.DELETED.value}) await MinIO.delete_object( - bucket_name=TESTING_REPORT_PATH_IN_MINIO, - object_name=str(task_entity.op_id) + TESTING_REPORT_PATH_IN_MINIO, + str(task_entity.op_id) ) await TestingManager.update_testing_by_testing_id(task_entity.op_id, { "ave_score": -1, diff --git a/data_chain/apps/base/task/worker/export_dataset_worker.py b/data_chain/apps/base/task/worker/export_dataset_worker.py index 6b3b5d9..4264165 100644 --- a/data_chain/apps/base/task/worker/export_dataset_worker.py +++ b/data_chain/apps/base/task/worker/export_dataset_worker.py @@ -66,8 +66,8 @@ class ExportDataSetWorker(BaseWorker): if os.path.exists(tmp_path): shutil.rmtree(tmp_path) await MinIO.delete_object( - bucket_name=EXPORT_DATASET_PATH_IN_MINIO, - object_name=str(task_entity.id) + EXPORT_DATASET_PATH_IN_MINIO, + str(task_entity.id) ) if task_entity.retry < config['TASK_RETRY_TIME_LIMIT']: await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.PENDING.value}) @@ -201,7 +201,7 @@ class ExportDataSetWorker(BaseWorker): return None if task_entity.status == TaskStatus.CANCLED or TaskStatus.FAILED.value: await MinIO.delete_object( - bucket_name=EXPORT_DATASET_PATH_IN_MINIO, - object_name=str(task_entity.op_id) + EXPORT_DATASET_PATH_IN_MINIO, + str(task_entity.op_id) ) return task_id diff --git a/data_chain/apps/base/task/worker/export_knowledge_base_worker.py b/data_chain/apps/base/task/worker/export_knowledge_base_worker.py index 6945aec..0dfb6fb 100644 --- a/data_chain/apps/base/task/worker/export_knowledge_base_worker.py +++ b/data_chain/apps/base/task/worker/export_knowledge_base_worker.py @@ -47,8 +47,8 @@ class ExportKnowledgeBaseWorker(BaseWorker): status=TaskStatus.PENDING.value) task_entity = await TaskManager.add_task(task_entity) await MinIO.delete_object( - bucket_name=EXPORT_KB_PATH_IN_MINIO, - object_name=str(task_entity.id) + EXPORT_KB_PATH_IN_MINIO, + str(task_entity.id) ) return task_entity.id @@ -64,8 +64,8 @@ class ExportKnowledgeBaseWorker(BaseWorker): if os.path.exists(tmp_path): shutil.rmtree(tmp_path) await MinIO.delete_object( - bucket_name=EXPORT_KB_PATH_IN_MINIO, - object_name=str(task_entity.id) + EXPORT_KB_PATH_IN_MINIO, + str(task_entity.id) ) if task_entity.retry < config['TASK_RETRY_TIME_LIMIT']: await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.PENDING.value}) @@ -97,7 +97,7 @@ class ExportKnowledgeBaseWorker(BaseWorker): shutil.rmtree(tmp_path) os.mkdir(tmp_path) source_path = os.path.join(tmp_path, "source") - target_path = os.path.join(tmp_path, f"{task_id}.zip") + target_path = os.path.join(tmp_path, f"zip") os.mkdir(source_path) os.mkdir(target_path) doc_config_path = os.path.join(source_path, "doc_config") @@ -123,7 +123,7 @@ class ExportKnowledgeBaseWorker(BaseWorker): } doc_type_entities = await KnowledgeBaseManager.list_doc_types_by_kb_id(kb_id) for doc_type_entity in doc_type_entities: - kb_dict["doc_types"].append({"id": doc_type_entity.id, "name": doc_type_entity.name}) + kb_dict["doc_types"].append({"id": str(doc_type_entity.id), "name": doc_type_entity.name}) yaml_path = os.path.join(source_path, "kb_config.yaml") with open(yaml_path, "w", encoding="utf-8", errors='ignore') as f: yaml.dump(kb_dict, f, allow_unicode=True) @@ -139,13 +139,13 @@ class ExportKnowledgeBaseWorker(BaseWorker): "size": doc_entity.size, "parse_method": doc_entity.parse_method, "chunk_size": doc_entity.chunk_size, - "type_id": doc_entity.type_id, + "type_id": str(doc_entity.type_id), "enabled": doc_entity.enabled, } + print(doc_dict) yaml_path = os.path.join(doc_config_path, f"{doc_entity.id}.yaml") with open(yaml_path, "w", encoding="utf-8", errors='ignore') as f: yaml.dump(doc_dict, f, allow_unicode=True) - pass @staticmethod async def download_document_from_minio(doc_config_path: str, kb_id: uuid.UUID) -> None: @@ -156,14 +156,16 @@ class ExportKnowledgeBaseWorker(BaseWorker): await MinIO.download_object(DOC_PATH_IN_MINIO, str(doc_entity.id), local_path) @staticmethod - async def zip_config_and_document(source_path: str, target_path: str) -> None: + async def zip_config_and_document(source_path: str, target_path: str, task_id: uuid.UUID) -> None: '''压缩配置文件和文档''' - await ZipHandler.zip_dir(source_path, target_path) + zip_path = os.path.join(target_path, f"{task_id}.zip") + await ZipHandler.zip_dir(source_path, zip_path) + return zip_path @staticmethod - async def upload_zip_to_minio(target_path: str, task_id: uuid.UUID) -> None: + async def upload_zip_to_minio(zip_path: str, task_id: uuid.UUID) -> None: '''上传压缩包到minio''' - await MinIO.put_object(EXPORT_KB_PATH_IN_MINIO, str(task_id), target_path) + await MinIO.put_object(EXPORT_KB_PATH_IN_MINIO, str(task_id), zip_path) @staticmethod async def run(task_id: uuid.UUID) -> None: @@ -189,10 +191,10 @@ class ExportKnowledgeBaseWorker(BaseWorker): await ExportKnowledgeBaseWorker.download_document_from_minio(doc_download_path, task_entity.op_id) current_stage += 1 await ExportKnowledgeBaseWorker.report(task_id, "下载文档", current_stage, stage_cnt) - await ExportKnowledgeBaseWorker.zip_config_and_document(source_path, target_path) + zip_path = await ExportKnowledgeBaseWorker.zip_config_and_document(source_path, target_path, task_id) current_stage += 1 await ExportKnowledgeBaseWorker.report(task_id, "压缩配置文件和文档", current_stage, stage_cnt) - await ExportKnowledgeBaseWorker.upload_zip_to_minio(target_path, task_id) + await ExportKnowledgeBaseWorker.upload_zip_to_minio(zip_path, task_id) current_stage += 1 await ExportKnowledgeBaseWorker.report(task_id, "上传压缩包到minio", current_stage, stage_cnt) await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.SUCCESS.value)) diff --git a/data_chain/apps/base/task/worker/import_dataset_worker.py b/data_chain/apps/base/task/worker/import_dataset_worker.py index 6ecf2b6..3e63c7e 100644 --- a/data_chain/apps/base/task/worker/import_dataset_worker.py +++ b/data_chain/apps/base/task/worker/import_dataset_worker.py @@ -272,7 +272,7 @@ class ImportDataSetWorker(BaseWorker): if task_entity.status == TaskStatus.CANCLED or TaskStatus.FAILED.value: await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.DELETED.value}) await MinIO.delete_object( - bucket_name=IMPORT_DATASET_PATH_IN_MINIO, - object_name=str(task_entity.op_id) + IMPORT_DATASET_PATH_IN_MINIO, + str(task_entity.op_id) ) return task_id diff --git a/data_chain/apps/base/task/worker/import_knowledge_base_worker.py b/data_chain/apps/base/task/worker/import_knowledge_base_worker.py index 2198bb3..d950e49 100644 --- a/data_chain/apps/base/task/worker/import_knowledge_base_worker.py +++ b/data_chain/apps/base/task/worker/import_knowledge_base_worker.py @@ -60,7 +60,7 @@ class ImportKnowledgeBaseWorker(BaseWorker): await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.PENDING.value}) return True else: - await MinIO.delete_object(IMPORT_KB_PATH_IN_OS, str(task_id)) + await MinIO.delete_object(IMPORT_KB_PATH_IN_OS, str(task_entity.op_id)) await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.DELETED.value}) return False diff --git a/data_chain/apps/base/task/worker/parse_document_worker.py b/data_chain/apps/base/task/worker/parse_document_worker.py index 59b5118..0e6273c 100644 --- a/data_chain/apps/base/task/worker/parse_document_worker.py +++ b/data_chain/apps/base/task/worker/parse_document_worker.py @@ -113,7 +113,6 @@ class ParseDocumentWorker(BaseWorker): logging.exception(err) raise Exception(err) file_path = os.path.join(tmp_path, str(doc_id)+'.'+doc_entity.extension) - print(file_path) await MinIO.download_object( DOC_PATH_IN_MINIO, str(doc_entity.id), @@ -137,28 +136,43 @@ class ParseDocumentWorker(BaseWorker): nodes.append(node) parse_result.nodes = nodes if doc_entity.parse_method == ParseMethod.QA: - for node in parse_result.nodes: - try: - if isinstance(node.content, list): + if doc_entity.extension == 'xlsx' or doc_entity.extension == 'csv': + for node in parse_result.nodes: + node.type = ChunkType.QA + try: question = '' if len(node.content) > 0: question = str(node.content[0]) answer = '' if len(node.content) > 1: answer = str(node.content[1]) - elif isinstance(node.content, dict): - question = node.content.get('question', '') - answer = node.content.get('answer', '') - else: + except Exception as e: question = '' answer = '' - except Exception as e: - question = '' - answer = '' - err = f"[ParseDocumentWorker] 解析问题和答案失败,doc_id: {doc_entity.id}, error: {e}" - logging.exception(err) - node.text_feature = question - node.content = 'question: ' + question + '\n' + 'answer: ' + answer + warning = f"[ParseDocumentWorker] 解析问题和答案失败,doc_id: {doc_entity.id}, error: {e}" + logging.warning(warning) + node.text_feature = question + node.content = 'question: ' + question + '\n' + 'answer: ' + answer + elif doc_entity.extension == 'json' or doc_entity.extension == 'yaml': + qa_list = parse_result.nodes[0].content + parse_result.nodes = [] + for qa in qa_list: + question = qa.get('question') + answer = qa.get('answer') + if question is None or answer is None: + warning = f"[ParseDocumentWorker] 解析问题和答案失败,doc_id: {doc_entity.id}, error: {e}" + logging.warning(warning) + continue + node = ParseNode( + id=uuid.uuid4(), + lv=0, + parse_topology_type=ChunkParseTopology.GERNERAL, + text_feature=question, + content='question: ' + question + '\n' + 'answer: ' + answer, + type=ChunkType.QA, + link_nodes=[] + ) + parse_result.nodes.append(node) else: for node in parse_result.nodes: if isinstance(node.content, list) or isinstance(node.content, dict): diff --git a/data_chain/apps/service/acc_testing_service.py b/data_chain/apps/service/acc_testing_service.py index 4008f2a..46e30c2 100644 --- a/data_chain/apps/service/acc_testing_service.py +++ b/data_chain/apps/service/acc_testing_service.py @@ -131,8 +131,8 @@ class TestingService: if not task_entity: raise Exception("任务不存在") download_url = MinIO.generate_download_link( - bucket_name=IMPORT_DATASET_PATH_IN_MINIO, - object_name=str(task_entity.task_id), + IMPORT_DATASET_PATH_IN_MINIO, + str(task_entity.task_id), ) return download_url except Exception as e: diff --git a/data_chain/apps/service/dataset_service.py b/data_chain/apps/service/dataset_service.py index 456f183..da397ec 100644 --- a/data_chain/apps/service/dataset_service.py +++ b/data_chain/apps/service/dataset_service.py @@ -133,8 +133,8 @@ class DataSetService: """生成数据集下载链接""" try: download_url = await MinIO.generate_download_link( - bucket_name=IMPORT_DATASET_PATH_IN_MINIO, - object_name=str(task_id), + IMPORT_DATASET_PATH_IN_MINIO, + str(task_id), ) return download_url except Exception as e: diff --git a/data_chain/apps/service/document_service.py b/data_chain/apps/service/document_service.py index 293a9b9..8f9caa3 100644 --- a/data_chain/apps/service/document_service.py +++ b/data_chain/apps/service/document_service.py @@ -86,8 +86,8 @@ class DocumentService: """生成文档下载链接""" try: download_url = await MinIO.generate_download_link( - bucket_name=DOC_PATH_IN_MINIO, - file_name=str(doc_id)) + DOC_PATH_IN_MINIO, + str(doc_id)) return download_url except Exception as e: err = "生成文档下载链接失败" diff --git a/data_chain/apps/service/knwoledge_base_service.py b/data_chain/apps/service/knwoledge_base_service.py index 874a716..0e3b640 100644 --- a/data_chain/apps/service/knwoledge_base_service.py +++ b/data_chain/apps/service/knwoledge_base_service.py @@ -145,8 +145,8 @@ class KnowledgeBaseService: try: # 获取知识库 download_link = await MinIO.generate_download_link( - bucket_name=EXPORT_KB_PATH_IN_MINIO, - object_name=f"{task_id}.zip", + EXPORT_KB_PATH_IN_MINIO, + str(task_id), ) return download_link except Exception as e: diff --git a/data_chain/apps/service/session_service.py b/data_chain/apps/service/session_service.py index 1e00568..0f0c959 100644 --- a/data_chain/apps/service/session_service.py +++ b/data_chain/apps/service/session_service.py @@ -22,11 +22,18 @@ class UserHTTPException(HTTPException): async def verify_user(request: HTTPConnection): """验证用户是否在Session中""" - return if config["DEBUG"]: return try: - session_id = request.cookies["ECSESSION"] + session_id = None + auth_header = request.headers.get("Authorization") + if auth_header and auth_header.startswith("Bearer "): + session_id = auth_header.split(" ", 1)[1] + elif "ECSESSION" in request.cookies: + session_id = request.cookies["ECSESSION"] + if session_id is None: + raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, + retcode=401, rtmsg="Authentication Error.", data="") except: raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, retcode=401, rtmsg="Authentication Error.", data="") @@ -43,7 +50,12 @@ async def get_user_sub(request: HTTPConnection) -> uuid: user_sub = "admin" else: try: - session_id = request.cookies["ECSESSION"] + session_id = None + auth_header = request.headers.get("Authorization") + if auth_header and auth_header.startswith("Bearer "): + session_id = auth_header.split(" ", 1)[1] + elif "ECSESSION" in request.cookies: + session_id = request.cookies["ECSESSION"] except: raise UserHTTPException(status_code=status.HTTP_401_UNAUTHORIZED, retcode=401, rtmsg="Authentication Error.", data="") diff --git a/data_chain/apps/service/task_queue_service.py b/data_chain/apps/service/task_queue_service.py index 3db8b91..4257f52 100644 --- a/data_chain/apps/service/task_queue_service.py +++ b/data_chain/apps/service/task_queue_service.py @@ -15,16 +15,23 @@ class TaskQueueService: @staticmethod async def init_task_queue(): - task_entities = await TaskManager.get_task_by_task_status(TaskStatus.RUNNING.value) + task_entities = await TaskManager.list_task_by_task_status(TaskStatus.PENDING.value) + task_entities += await TaskManager.list_task_by_task_status(TaskStatus.RUNNING.value) for task_entity in task_entities: try: - flag = await BaseWorker.reinit(task_entity.id) - if flag: - task = Task(_id=task_entity.id, status=TaskStatus.PENDING.value) - await TaskQueueManager.update_task_by_id(task_entity.id, task) + if task_entity.status == TaskStatus.RUNNING.value: + flag = await BaseWorker.reinit(task_entity.id) + if flag: + task = Task(_id=task_entity.id, status=TaskStatus.PENDING.value) + await TaskQueueManager.update_task_by_id(task_entity.id, task) + else: + await BaseWorker.stop(task_entity.id) + await TaskQueueManager.delete_task_by_id(task_entity.id) else: - await BaseWorker.stop(task_entity.id) - await TaskQueueManager.delete_task_by_id(task_entity.id) + task = await TaskQueueManager.get_task_by_id(task_entity.id) + if task is None: + task = Task(_id=task_entity.id, status=TaskStatus.PENDING.value) + await TaskQueueManager.add_task(task) except Exception as e: warining = f"[TaskQueueService] 初始化任务失败 {e}" logging.warning(warining) diff --git a/data_chain/entities/enum.py b/data_chain/entities/enum.py index 323248a..00faf7e 100644 --- a/data_chain/entities/enum.py +++ b/data_chain/entities/enum.py @@ -105,6 +105,7 @@ class ChunkType(str, Enum): IMAGE = "image" CODE = "code" LINK = "link" + QA = "qa" class ChunkParseTopology(str, Enum): diff --git a/data_chain/manager/task_manager.py b/data_chain/manager/task_manager.py index a408419..dd81c4c 100644 --- a/data_chain/manager/task_manager.py +++ b/data_chain/manager/task_manager.py @@ -54,14 +54,14 @@ class TaskManager(): raise e @staticmethod - async def get_task_by_task_status(task_status: str) -> List[TaskEntity]: + async def list_task_by_task_status(task_status: str) -> List[TaskEntity]: """根据任务状态获取任务""" try: async with await DataBase.get_session() as session: stmt = select(TaskEntity).where(TaskEntity.status == task_status) result = await session.execute(stmt) - task_entity = result.scalars().all() - return task_entity + task_entities = result.scalars().all() + return task_entities except Exception as e: err = "获取任务失败" logging.exception("[TaskManager] %s", err) diff --git a/data_chain/manager/task_queue_mamanger.py b/data_chain/manager/task_queue_mamanger.py index 6917b98..8f4db40 100644 --- a/data_chain/manager/task_queue_mamanger.py +++ b/data_chain/manager/task_queue_mamanger.py @@ -48,6 +48,19 @@ class TaskQueueManager(): logging.exception("[TaskQueueManager] %s", err) raise e + @staticmethod + async def get_task_by_id(task_id: uuid.UUID) -> Task: + """根据任务ID获取任务""" + try: + async with MongoDB.get_session() as session: + task_colletion = MongoDB.get_collection('witchiand_task') + task = await task_colletion.find_one({"_id": task_id}, session=session) + return Task(**task) if task else None + except Exception as e: + err = "获取任务失败" + logging.exception("[TaskQueueManager] %s", err) + raise e + @staticmethod async def update_task_by_id(task_id: uuid.UUID, task: Task): """根据任务ID更新任务""" diff --git a/data_chain/parser/handler/base_parser.py b/data_chain/parser/handler/base_parser.py index 72dd27c..44ad627 100644 --- a/data_chain/parser/handler/base_parser.py +++ b/data_chain/parser/handler/base_parser.py @@ -7,7 +7,6 @@ class BaseParser: @staticmethod def find_worker_class(worker_name): subclasses = BaseParser.__subclasses__() - print(subclasses) for subclass in subclasses: if subclass.name == worker_name: return subclass diff --git a/data_chain/stores/database/database.py b/data_chain/stores/database/database.py index 6a068f1..5eef775 100644 --- a/data_chain/stores/database/database.py +++ b/data_chain/stores/database/database.py @@ -404,7 +404,7 @@ class QAEntity(Base): id = Column(UUID, default=uuid4, primary_key=True) # 数据id dataset_id = Column(UUID, ForeignKey('dataset.id', ondelete="CASCADE")) # 数据所属数据集id doc_id = Column(UUID) # 数据关联的文档id - doc_name = Column(String) # 数据关联的文档名称 + doc_name = Column(String, default="未知文档") # 数据关联的文档名称 question = Column(String) # 数据的问题 answer = Column(String) # 数据的答案 chunk = Column(String) # 数据的片段 @@ -436,14 +436,14 @@ class TestingEntity(Base): search_method = Column(String, default=SearchMethod.KEYWORD_AND_VECTOR.value) # 测试任务的使用的检索增强模式类型 top_k = Column(Integer, default=5) # 测试任务的检索增强模式的top_k status = Column(String, default=TestingStatus.IDLE.value) # 测试任务的状态 - ave_score = Column(Float) # 测试任务的综合得分 - ave_pre = Column(Float) # 测试任务的平均召回率 - ave_rec = Column(Float) # 测试任务的平均精确率 - ave_fai = Column(Float) # 测试任务的平均忠实值 - ave_rel = Column(Float) # 测试任务的平均可解释性 - ave_lcs = Column(Float) # 测试任务的平均最长公共子序列得分 - ave_leve = Column(Float) # 测试任务的平均编辑距离得分 - ave_jac = Column(Float) # 测试任务的平均杰卡德相似系数 + ave_score = Column(Float, default=-1) # 测试任务的综合得分 + ave_pre = Column(Float, default=-1) # 测试任务的平均召回率 + ave_rec = Column(Float, default=-1) # 测试任务的平均精确率 + ave_fai = Column(Float, default=-1) # 测试任务的平均忠实值 + ave_rel = Column(Float, default=-1) # 测试任务的平均可解释性 + ave_lcs = Column(Float, default=-1) # 测试任务的平均最长公共子序列得分 + ave_leve = Column(Float, default=-1) # 测试任务的平均编辑距离得分 + ave_jac = Column(Float, default=-1) # 测试任务的平均杰卡德相似系数 created_at = Column( TIMESTAMP(timezone=True), nullable=True, -- Gitee From 7bf06b4f29b6795a0ff0b510a367887936505f90 Mon Sep 17 00:00:00 2001 From: zxstty Date: Wed, 14 May 2025 21:56:27 +0800 Subject: [PATCH 07/15] =?UTF-8?q?=E5=AE=8C=E5=96=84=E8=B5=84=E4=BA=A7?= =?UTF-8?q?=E5=BA=93=E5=AF=BC=E5=85=A5=E5=AF=BC=E5=87=BA=E7=9A=84=E9=80=BB?= =?UTF-8?q?=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data_chain/apps/base/convertor.py | 37 +++++---- .../worker/export_knowledge_base_worker.py | 4 +- .../task/worker/generate_dataset_worker.py | 38 ++++++--- .../worker/import_knowledge_base_worker.py | 28 ++++--- .../base/task/worker/parse_document_worker.py | 78 ++++++++++++++---- data_chain/apps/base/zip_handler.py | 14 ++-- data_chain/apps/router/chunk.py | 25 ++++-- data_chain/apps/router/dataset.py | 2 +- data_chain/apps/router/document.py | 2 + data_chain/apps/router/knowledge_base.py | 2 +- data_chain/apps/service/chunk_service.py | 22 ++++- .../apps/service/knwoledge_base_service.py | 19 ++++- data_chain/apps/service/task_service.py | 6 +- data_chain/entities/request_data.py | 12 +-- data_chain/entities/response_data.py | 6 ++ data_chain/manager/chunk_manager.py | 30 ++++++- data_chain/manager/knowledge_manager.py | 4 + data_chain/manager/task_manager.py | 13 ++- data_chain/manager/testing_manager.py | 37 ++++++--- data_chain/parser/tools/ocr_tool.py | 18 ++-- data_chain/stores/database/database.py | 1 + openapi.json | 2 +- test.jpeg | Bin 0 -> 56781 bytes 23 files changed, 282 insertions(+), 118 deletions(-) create mode 100644 test.jpeg diff --git a/data_chain/apps/base/convertor.py b/data_chain/apps/base/convertor.py index 2c713a1..3bcbee8 100644 --- a/data_chain/apps/base/convertor.py +++ b/data_chain/apps/base/convertor.py @@ -393,14 +393,29 @@ class Convertor: logging.exception("[Convertor] %s", err) raise e + @staticmethod + async def convert_update_chunk_request_to_dict( + req: UpdateChunkRequest) -> dict: + """将更新分片请求转换为字典""" + try: + req_dict = { + 'text': req.text, + 'enabled': req.enabled + } + return req_dict + except Exception as e: + err = "更新分片请求转换为字典失败" + logging.exception("[Convertor] %s", err) + raise e + @staticmethod async def convert_chunk_entity_to_chunk( chunk_entity: ChunkEntity) -> Chunk: """将chunk实体转换为chunk""" try: chunk = Chunk( - chunk_id=chunk_entity.id, - chunk_type=ChunkType(chunk_entity.type), + chunkId=chunk_entity.id, + chunkType=ChunkType(chunk_entity.type), text=chunk_entity.text ) return chunk @@ -409,21 +424,6 @@ class Convertor: logging.exception("[Convertor] %s", err) raise e - @staticmethod - async def convert_update_chunk_request_to_dict( - req: UpdateChunkRequest) -> dict: - """将更新chunk请求转换为字典""" - try: - req_dict = { - 'text': req.text, - 'enabled': req.enabled - } - return req_dict - except Exception as e: - err = "更新chunk请求转换为字典失败" - logging.exception("[Convertor] %s", err) - raise e - @staticmethod async def convert_dataset_entity_to_dataset( dataset_entity: DataSetEntity) -> Dataset: @@ -486,7 +486,8 @@ class Convertor: docName=qa_entity.doc_name, question=qa_entity.question, answer=qa_entity.answer, - chunk=qa_entity.chunk + chunk=qa_entity.chunk, + chunkType=qa_entity.chunk_type, ) return data except Exception as e: diff --git a/data_chain/apps/base/task/worker/export_knowledge_base_worker.py b/data_chain/apps/base/task/worker/export_knowledge_base_worker.py index 0dfb6fb..5995deb 100644 --- a/data_chain/apps/base/task/worker/export_knowledge_base_worker.py +++ b/data_chain/apps/base/task/worker/export_knowledge_base_worker.py @@ -134,6 +134,7 @@ class ExportKnowledgeBaseWorker(BaseWorker): doc_entities = await DocumentManager.list_all_document_by_kb_id(kb_id) for doc_entity in doc_entities: doc_dict = { + "id": str(doc_entity.id), "name": doc_entity.name, "extension": doc_entity.extension, "size": doc_entity.size, @@ -142,7 +143,6 @@ class ExportKnowledgeBaseWorker(BaseWorker): "type_id": str(doc_entity.type_id), "enabled": doc_entity.enabled, } - print(doc_dict) yaml_path = os.path.join(doc_config_path, f"{doc_entity.id}.yaml") with open(yaml_path, "w", encoding="utf-8", errors='ignore') as f: yaml.dump(doc_dict, f, allow_unicode=True) @@ -152,7 +152,7 @@ class ExportKnowledgeBaseWorker(BaseWorker): '''从minio下载文档''' doc_entities = await DocumentManager.list_all_document_by_kb_id(kb_id) for doc_entity in doc_entities: - local_path = os.path.join(doc_config_path, f"{doc_entity.id}") + local_path = os.path.join(doc_config_path, f"{doc_entity.id}.{doc_entity.extension}") await MinIO.download_object(DOC_PATH_IN_MINIO, str(doc_entity.id), local_path) @staticmethod diff --git a/data_chain/apps/base/task/worker/generate_dataset_worker.py b/data_chain/apps/base/task/worker/generate_dataset_worker.py index 83c30c0..6036afe 100644 --- a/data_chain/apps/base/task/worker/generate_dataset_worker.py +++ b/data_chain/apps/base/task/worker/generate_dataset_worker.py @@ -24,10 +24,15 @@ from data_chain.stores.minio.minio import MinIO from data_chain.stores.mongodb.mongodb import Task +class Chunk(BaseModel): + text: str + type: str + + class DocChunk(BaseModel): doc_id: uuid.UUID doc_name: str - chunks: list[str] + chunks: list[Chunk] class GenerateDataSetWorker(BaseWorker): @@ -91,7 +96,12 @@ class GenerateDataSetWorker(BaseWorker): doc_chunks = [] for dataset_doc_entity in dataset_doc_entities: chunk_entities = await ChunkManager.list_all_chunk_by_doc_id(dataset_doc_entity.id) - chunks = [chunk_entity.text for chunk_entity in chunk_entities] + chunks = [] + for chunk_entity in chunk_entities: + chunks.append(Chunk( + text=chunk_entity.text, + type=chunk_entity.type + )) doc_chunk = DocChunk( doc_id=dataset_entity.id, doc_name=dataset_entity.name, @@ -109,8 +119,9 @@ class GenerateDataSetWorker(BaseWorker): if chunk_cnt == 0: return [] qa_entities = [] - division = dataset_entity.data_cnt // chunk_cnt - remainder = dataset_entity.data_cnt % chunk_cnt + data_cnt = dataset_entity.data_cnt + division = data_cnt // chunk_cnt + remainder = data_cnt % chunk_cnt index = 0 random.shuffle(doc_chunks) with open(config['PROMPT_PATH'], 'r', encoding='utf-8') as f: @@ -119,18 +130,21 @@ class GenerateDataSetWorker(BaseWorker): answer_generate_prompt_template = prompt_dict.get('GENERATE_ANSWER_FROM_QUESTION_AND_CONTENT_PROMPT', '') cal_qa_score_prompt_template = prompt_dict.get('CAL_QA_SCORE_PROMPT', '') dataset_score = 0 - for doc_chunk in doc_chunks: - - for i in range(len(doc_chunk.chunks)): - chunk = doc_chunk.chunks[i] + for i in range(len(doc_chunks)): + doc_chunk = doc_chunks[i] + for j in range(len(doc_chunk.chunks)): + chunk = doc_chunk.chunks[j].text if dataset_entity.is_chunk_related: if i > 0: chunk = doc_chunk.chunks[i-1] + chunk if i < len(doc_chunk.chunks) - 1: chunk = chunk + doc_chunk.chunks[i+1] - qa_cnt = division - if index < remainder: - qa_cnt += 1 + qa_cnt = random.randint(0, 2*(division+(index <= remainder))) + if i == len(doc_chunks)-1 and j == len(doc_chunk.chunks)-1: + qa_cnt = data_cnt + qa_cnt = min(qa_cnt, data_cnt) + data_cnt -= qa_cnt + data_cnt = max(data_cnt, 0) qs = [] answer = [] rd = 5 @@ -189,7 +203,7 @@ class GenerateDataSetWorker(BaseWorker): question=q, answer=ans, chunk=chunk, - ) + chunk_type=doc_chunk.chunks[i].type) qa_entities.append(qa_entity) index += 1 if len(qa_entities) > 0: diff --git a/data_chain/apps/base/task/worker/import_knowledge_base_worker.py b/data_chain/apps/base/task/worker/import_knowledge_base_worker.py index d950e49..670ae1b 100644 --- a/data_chain/apps/base/task/worker/import_knowledge_base_worker.py +++ b/data_chain/apps/base/task/worker/import_knowledge_base_worker.py @@ -6,6 +6,7 @@ import yaml from data_chain.apps.base.zip_handler import ZipHandler from data_chain.config.config import config from data_chain.logger.logger import logger as logging +from data_chain.apps.service.task_queue_service import TaskQueueService from data_chain.apps.base.task.worker.base_worker import BaseWorker from data_chain.entities.enum import TaskType, TaskStatus, KnowledgeBaseStatus, DocumentStatus from data_chain.entities.common import DEFAULt_DOC_TYPE_ID, IMPORT_KB_PATH_IN_OS, DOC_PATH_IN_MINIO, IMPORT_KB_PATH_IN_MINIO @@ -86,13 +87,10 @@ class ImportKnowledgeBaseWorker(BaseWorker): shutil.rmtree(tmp_path) os.mkdir(tmp_path) source_path = os.path.join(tmp_path, f"{task_id}.zip") - target_path = os.path.join(tmp_path, "source") - os.mkdir(source_path) + target_path = os.path.join(tmp_path, "target") os.mkdir(target_path) doc_config_path = os.path.join(target_path, "doc_config") doc_download_path = os.path.join(target_path, "doc_download") - os.mkdir(doc_config_path) - os.mkdir(doc_download_path) return (source_path, target_path, doc_config_path, doc_download_path) @staticmethod @@ -121,6 +119,7 @@ class ImportKnowledgeBaseWorker(BaseWorker): doc_type_entity = await DocumentTypeManager.add_document_type(doc_type_entity) if doc_type_entity: doc_types_old_id_map_to_new_id[doc_type_dict['id']] = doc_type_entity.id + return doc_types_old_id_map_to_new_id @staticmethod async def add_docs_to_kb(kb_id: uuid.UUID, doc_config_path: str, doc_download_path: str, @@ -131,12 +130,15 @@ class ImportKnowledgeBaseWorker(BaseWorker): doc_config_names = os.listdir(doc_config_path) for doc_config_name in doc_config_names: try: - doc_config_path = os.path.join(doc_config_path, doc_config_name) - doc_path = os.path.join(doc_download_path, doc_config_name) + yaml_path = os.path.join(doc_config_path, doc_config_name) + with open(yaml_path, "r", encoding="utf-8") as f: + doc_config = yaml.load(f, Loader=yaml.SafeLoader) + old_doc_id = doc_config["id"] + extension = doc_config["extension"] + doc_name = f"{old_doc_id}.{extension}" + doc_path = os.path.join(doc_download_path, doc_name) if not os.path.exists(doc_path): continue - with open(doc_config_path, "r", encoding="utf-8") as f: - doc_config = yaml.load(f, Loader=yaml.SafeLoader) doc_type_id = doc_types_old_id_map_to_new_id.get(doc_config.get("type_id"), DEFAULt_DOC_TYPE_ID) document_entity = DocumentEntity( team_id=kb_entity.team_id, @@ -160,6 +162,7 @@ class ImportKnowledgeBaseWorker(BaseWorker): logging.exception(err) continue await KnowledgeBaseManager.update_doc_cnt_and_doc_size(kb_id) + return doc_old_id_map_to_new_id @staticmethod async def upload_document_to_minio( @@ -171,8 +174,9 @@ class ImportKnowledgeBaseWorker(BaseWorker): doc_path = os.path.join(doc_download_path, doc_name) if not os.path.exists(doc_path): continue - if doc_name in doc_old_id_map_to_new_id.keys(): - await MinIO.put_object(DOC_PATH_IN_MINIO, doc_old_id_map_to_new_id.get(doc_name), doc_path) + old_id = doc_name.split('.')[0] + if old_id in doc_old_id_map_to_new_id.keys(): + await MinIO.put_object(DOC_PATH_IN_MINIO, str(doc_old_id_map_to_new_id.get(old_id)), doc_path) except Exception as e: err = f"[ImportKnowledgeBaseWorker] 上传文档失败,文档路径: {doc_path},错误信息: {e}" logging.exception(err) @@ -183,7 +187,7 @@ class ImportKnowledgeBaseWorker(BaseWorker): '''初始化文档解析任务''' document_entities = await DocumentManager.list_all_document_by_kb_id(kb_id) for document_entity in document_entities: - await BaseWorker.init(TaskType.DOC_PARSE.value, document_entity.id) + await TaskQueueService.init_task(TaskType.DOC_PARSE.value, document_entity.id) @staticmethod async def run(task_id: uuid.UUID) -> None: @@ -207,7 +211,7 @@ class ImportKnowledgeBaseWorker(BaseWorker): await ImportKnowledgeBaseWorker.unzip_config_and_document(source_path, target_path) current_stage += 1 await ImportKnowledgeBaseWorker.report(task_id, "解压zip文件", current_stage, stage_cnt) - doc_types_old_id_map_to_new_id = await ImportKnowledgeBaseWorker.add_doc_types_to_kb(kb_id, doc_config_path) + doc_types_old_id_map_to_new_id = await ImportKnowledgeBaseWorker.add_doc_types_to_kb(kb_id, target_path) current_stage += 1 await ImportKnowledgeBaseWorker.report(task_id, "添加文档类型到知识库", current_stage, stage_cnt) doc_old_id_map_to_new_id = await ImportKnowledgeBaseWorker.add_docs_to_kb(kb_id, doc_config_path, doc_download_path, doc_types_old_id_map_to_new_id) diff --git a/data_chain/apps/base/task/worker/parse_document_worker.py b/data_chain/apps/base/task/worker/parse_document_worker.py index 0e6273c..07cfd2b 100644 --- a/data_chain/apps/base/task/worker/parse_document_worker.py +++ b/data_chain/apps/base/task/worker/parse_document_worker.py @@ -1,4 +1,5 @@ # Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +from typing import Any import uuid import os import shutil @@ -126,6 +127,27 @@ class ParseDocumentWorker(BaseWorker): parse_result = await BaseParser.parser(doc_entity.extension, file_path) return parse_result + @staticmethod + async def get_content_from_json(js: Any) -> str: + '''获取json内容''' + if isinstance(js, dict): + content = '' + for key, value in js.items(): + content += str(key) + ': ' + if isinstance(value, (dict, list)): + content += await ParseDocumentWorker.get_content_from_json(value) + else: + content += str(value) + '\n' + return content + elif isinstance(js, list): + for item in js: + content += await ParseDocumentWorker.get_content_from_json(item) + content += ' ' + content += '\n' + return content + else: + return str(js) + @staticmethod async def handle_parse_result(parse_result: ParseResult, doc_entity: DocumentEntity, llm: LLM = None) -> None: '''处理解析结果''' @@ -174,11 +196,15 @@ class ParseDocumentWorker(BaseWorker): ) parse_result.nodes.append(node) else: - for node in parse_result.nodes: - if isinstance(node.content, list) or isinstance(node.content, dict): - node.content = str(node.content) - node.text_feature = str(node.content) - elif isinstance(node.content, str): + if doc_entity.extension == 'xlsx' or doc_entity.extension == 'csv': + for node in parse_result.nodes: + node.content = '|'.join(node.content) + node.text_feature = node.content + elif doc_entity.extension == 'json' or doc_entity.extension == 'yaml': + parse_result.nodes[0].content = await ParseDocumentWorker.get_content_from_json(parse_result.nodes[0].content) + parse_result.nodes[0].text_feature = parse_result.nodes[0].content + else: + for node in parse_result.nodes: if node.type == ChunkType.TEXT or node.type == ChunkType.LINK: node.text_feature = node.content elif node.type == ChunkType.CODE: @@ -186,6 +212,9 @@ class ParseDocumentWorker(BaseWorker): node.text_feature = await TokenTool.get_abstract_by_llm(node.content, llm) if node.text_feature is None: node.text_feature = TokenTool.get_top_k_keywords(node.content) + elif node.type == ChunkType.TABLE: + node.content = '|'.join(node.content) + node.text_feature = node.content @staticmethod async def upload_parse_image_to_minio_and_postgres( @@ -262,7 +291,7 @@ class ParseDocumentWorker(BaseWorker): len(nodes) and ( nodes[-1].type != ChunkType.TEXT or TokenTool.get_tokens(nodes[-1].content) + tokens > doc_entity. - chunk_size)) or len(nodes) == 0: + chunk_size)): nodes.append(node) else: nodes[-1].content += node.content @@ -270,10 +299,21 @@ class ParseDocumentWorker(BaseWorker): nodes.append(node) parse_result.nodes = nodes nodes = [] + tmp = '' for node in parse_result.nodes: if node.type == ChunkType.TEXT: sentences = TokenTool.content_to_sentences(node.content) - tmp = '' + new_sentences = [] + for sentence in sentences: + if TokenTool.get_tokens(sentence) > doc_entity.chunk_size: + tmp = sentence + while len(tmp) > 0: + sub_sentence = TokenTool.get_k_tokens_words_from_content(tmp, doc_entity.chunk_size) + new_sentences.append(sub_sentence) + tmp = tmp[len(sub_sentence):] + else: + new_sentences.append(sentence) + sentences = new_sentences for sentence in sentences: if len(tmp) == 0: tmp = sentence @@ -291,16 +331,20 @@ class ParseDocumentWorker(BaseWorker): nodes.append(node) else: tmp += sentence - if len(tmp) > 0: - node = ParseNode( - id=uuid.uuid4(), - lv=node.lv, - parse_topology_type=ChunkParseTopology.GERNERAL, - content=tmp, - type=ChunkType.TEXT, - link_nodes=[] - ) - nodes.append(node) + else: + tmp = '' + nodes.append(node) + if len(tmp) > 0: + node = ParseNode( + id=uuid.uuid4(), + lv=node.lv, + parse_topology_type=ChunkParseTopology.GERNERAL, + text_feature=tmp, + content=tmp, + type=ChunkType.TEXT, + link_nodes=[] + ) + nodes.append(node) parse_result.nodes = nodes @staticmethod diff --git a/data_chain/apps/base/zip_handler.py b/data_chain/apps/base/zip_handler.py index d1a48e5..3285eb8 100644 --- a/data_chain/apps/base/zip_handler.py +++ b/data_chain/apps/base/zip_handler.py @@ -10,7 +10,7 @@ from data_chain.logger.logger import logger as logging class ZipHandler(): @staticmethod - def check_zip_file(zip_file_path: str, max_file_num: int = 4096, max_file_size: int = 10 * 1024 * 1024 * 1024) -> None: + def check_zip_file(zip_file_path: str, max_file_num: int = 4096, max_file_size: int = 10 * 1024 * 1024 * 1024) -> bool: '''检查压缩文件的数量和大小''' total_size = 0 try: @@ -18,24 +18,22 @@ class ZipHandler(): if len(to_zip_file.filelist) > max_file_num: err = f"压缩文件{zip_file_path}的数量超过了上限" logging.error("[ZipHandler] %s", err) - raise Exception(err) + return False for file in to_zip_file.filelist: total_size += file.file_size if total_size > max_file_size: err = f"压缩文件{zip_file_path}的尺寸超过了上限" logging.error("[ZipHandler] %s", err) - raise Exception(err) - to_zip_file.namelist() - for member in to_zip_file.infolist(): - to_zip_file.open(member) + return False + return True except zipfile.BadZipFile: err = f"文件 {zip_file_path} 可能不是有效的ZIP文件." logging.error("[ZipHandler] %s", err) - raise e + return False except Exception as e: err = f"处理文件 {zip_file_path} 时出错: {e}" logging.error("[ZipHandler] %s", err) - raise e + return False @staticmethod async def zip_dir(start_dir: str, zip_name: str) -> None: diff --git a/data_chain/apps/router/chunk.py b/data_chain/apps/router/chunk.py index f7544f4..f456843 100644 --- a/data_chain/apps/router/chunk.py +++ b/data_chain/apps/router/chunk.py @@ -5,15 +5,16 @@ from typing import Annotated from uuid import UUID from data_chain.entities.request_data import ( ListChunkRequest, + UpdateChunkRequest, SearchChunkRequest, - UpdateChunkRequest ) from data_chain.entities.response_data import ( ListChunkMsg, ListChunkResponse, SearchChunkResponse, - UpdateChunkResponse + UpdateChunkResponse, + UpdateChunkEnabledResponse ) from data_chain.apps.service.router_service import get_route_info from data_chain.apps.service.document_service import DocumentService @@ -43,12 +44,24 @@ async def search_chunks( return SearchChunkResponse(result=search_chunk_msg) -@router.put('', response_model=UpdateChunkResponse, dependencies=[Depends(verify_user)]) +@router.put('/', response_model=UpdateChunkResponse, dependencies=[Depends(verify_user)]) async def update_chunk_by_id(user_sub: Annotated[str, Depends(get_user_sub)], action: Annotated[str, Depends(get_route_info)], - chunk_id: Annotated[UUID, Query(alias="chunkId")], - req: UpdateChunkRequest): + chunk_id: Annotated[list[UUID], Query(alias="chunkId")], + req: Annotated[UpdateChunkRequest, Body()]): if not (await ChunkService.validate_user_action_to_chunk(user_sub, chunk_id, action)): raise Exception("用户没有权限访问该文档的分片") - await ChunkService.update_chunk_by_id(chunk_id, req) + chunk_id = await ChunkService.update_chunk_by_id(chunk_id, req) return UpdateChunkResponse(result=chunk_id) + + +@router.put('/switch', response_model=UpdateChunkEnabledResponse, dependencies=[Depends(verify_user)]) +async def update_chunk_enabled_by_id(user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], + chunk_ids: Annotated[list[UUID], Body(alias="chunkId")], + enabled: Annotated[bool, Query()]): + for chunk_id in chunk_ids: + if not (await ChunkService.validate_user_action_to_chunk(user_sub, chunk_id, action)): + raise Exception("用户没有权限访问该文档的分片") + chunk_ids = await ChunkService.update_chunks_enabled_by_id(chunk_ids, enabled) + return UpdateChunkEnabledResponse(result=chunk_ids) diff --git a/data_chain/apps/router/dataset.py b/data_chain/apps/router/dataset.py index 167d9cf..105b52f 100644 --- a/data_chain/apps/router/dataset.py +++ b/data_chain/apps/router/dataset.py @@ -42,7 +42,7 @@ async def list_dataset_by_kb_id( ): if not (await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, req.kb_id, action)): raise Exception("用户没有权限访问该知识库的数据集") - list_dataset_msg = await DataSetService.list_dataset(req) + list_dataset_msg = await DataSetService.list_dataset_by_kb_id(req) return ListDatasetResponse(result=list_dataset_msg) diff --git a/data_chain/apps/router/document.py b/data_chain/apps/router/document.py index 0f74bf1..3494ea9 100644 --- a/data_chain/apps/router/document.py +++ b/data_chain/apps/router/document.py @@ -115,6 +115,8 @@ async def update_doc_by_doc_id( return UpdateDocumentResponse(result=doc_id) +@router.put('/switch', response_model=UpdateDocumentResponse, dependencies=[Depends(verify_user)]) + @router.delete('', response_model=DeleteDocumentResponse, dependencies=[Depends(verify_user)]) async def delete_docs_by_ids( user_sub: Annotated[str, Depends(get_user_sub)], diff --git a/data_chain/apps/router/knowledge_base.py b/data_chain/apps/router/knowledge_base.py index e03af2d..5484384 100644 --- a/data_chain/apps/router/knowledge_base.py +++ b/data_chain/apps/router/knowledge_base.py @@ -117,7 +117,7 @@ async def import_kbs(user_sub: Annotated[str, Depends(get_user_sub)], kb_packages: list[UploadFile] = File(...)): if not await TeamService.validate_user_action_in_team(user_sub, team_id, action): raise Exception("用户没有权限在该团队导入知识库") - kb_import_task_ids = await KnowledgeBaseService.import_kb(user_sub, team_id, kb_packages) + kb_import_task_ids = await KnowledgeBaseService.import_kbs(user_sub, team_id, kb_packages) return ImportKnowledgeBaseResponse(result=kb_import_task_ids) diff --git a/data_chain/apps/service/chunk_service.py b/data_chain/apps/service/chunk_service.py index 662e5b0..9601197 100644 --- a/data_chain/apps/service/chunk_service.py +++ b/data_chain/apps/service/chunk_service.py @@ -6,8 +6,8 @@ import traceback import os from data_chain.entities.request_data import ( ListChunkRequest, - SearchChunkRequest, UpdateChunkRequest, + SearchChunkRequest, ) from data_chain.entities.response_data import ( Task, @@ -58,6 +58,9 @@ class ChunkService: async def list_chunks_by_document_id(req: ListChunkRequest) -> ListChunkMsg: """根据文档ID列出分片""" try: + doc_entity = await DocumentManager.get_document_by_doc_id(req.doc_id) + if doc_entity.status != DocumentStatus.IDLE.value: + return ListChunkMsg(total=0, chunks=[]) total, chunk_entities = await ChunkManager.list_chunk(req) chunks = [] for chunk_entity in chunk_entities: @@ -124,11 +127,22 @@ class ChunkService: search_chunk_msg.docChunks.append(dc) return search_chunk_msg - async def update_chunk_by_id(chunk_id: uuid.UUID, req: UpdateChunkRequest) -> bool: + async def update_chunk_by_id(chunk_id: uuid.UUID, req: UpdateChunkRequest) -> uuid.UUID: try: chunk_dict = await Convertor.convert_update_chunk_request_to_dict(req) - await ChunkManager.update_chunk_by_chunk_id(chunk_id, chunk_dict) - return True + chunk_entity = await ChunkManager.update_chunk_by_chunk_id(chunk_id, chunk_dict) + return chunk_entity.id + except Exception as e: + err = "更新分片失败" + logging.exception("[ChunkService] %s", err) + raise Exception(err) + + async def update_chunks_enabled_by_id(chunk_ids: list[uuid.UUID], enabled: bool) -> list[uuid.UUID]: + try: + chunk_dict = {"enabled": enabled} + chunk_entities = await ChunkManager.update_chunk_by_chunk_ids(chunk_ids, chunk_dict) + chunk_ids = [chunk_entity.id for chunk_entity in chunk_entities] + return chunk_ids except Exception as e: err = "更新分片失败" logging.exception("[ChunkService] %s", err) diff --git a/data_chain/apps/service/knwoledge_base_service.py b/data_chain/apps/service/knwoledge_base_service.py index 0e3b640..e1360fb 100644 --- a/data_chain/apps/service/knwoledge_base_service.py +++ b/data_chain/apps/service/knwoledge_base_service.py @@ -3,6 +3,7 @@ import aiofiles import uuid from fastapi import APIRouter, Depends, Query, Body, File, UploadFile import os +import shutil import yaml from data_chain.logger.logger import logger as logging from data_chain.entities.request_data import ( @@ -221,9 +222,12 @@ class KnowledgeBaseService: raise Exception(err) kb_import_task_ids = [] for kb_package in kb_packages: - tmp_path = os.join(IMPORT_KB_PATH_IN_OS, str(uuid.uuid4())) + tmp_path = os.path.join(IMPORT_KB_PATH_IN_OS, str(uuid.uuid4())) zip_file_name = kb_package.filename zip_file_path = os.path.join(tmp_path, zip_file_name) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + os.makedirs(tmp_path) try: async with aiofiles.open(zip_file_path, "wb") as f: content = await kb_package.read() @@ -233,16 +237,25 @@ class KnowledgeBaseService: logging.exception("[KnowledgeBaseService] %s", err) continue if not ZipHandler.check_zip_file(zip_file_path): + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) err = "导入知识库失败,包含文件数量过多或者解压缩之后体积过大" logging.exception("[KnowledgeBaseService] %s", err) continue - if not await ZipHandler.unzip_file(zip_file_path, tmp_path, ['kb_config.yaml']): + try: + await ZipHandler.unzip_file(zip_file_path, tmp_path, ['kb_config.yaml']) + except Exception as e: + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) err = "导入知识库失败,解压缩失败" logging.exception("[KnowledgeBaseService] %s", err) continue kb_entity = await KnowledgeBaseService.get_kb_entity_from_yaml( user_sub, team_id, os.path.join(tmp_path, 'kb_config.yaml')) + kb_entity = await KnowledgeBaseManager.add_knowledge_base(kb_entity) if kb_entity is None: + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) err = "导入知识库失败,获取知识库配置失败" logging.exception("[KnowledgeBaseService] %s", err) continue @@ -254,6 +267,8 @@ class KnowledgeBaseService: except Exception as e: err = "导入知识库失败" logging.exception("[KnowledgeBaseService] %s", err) + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) return kb_import_task_ids @staticmethod diff --git a/data_chain/apps/service/task_service.py b/data_chain/apps/service/task_service.py index 92bbfde..7f88ee2 100644 --- a/data_chain/apps/service/task_service.py +++ b/data_chain/apps/service/task_service.py @@ -37,7 +37,7 @@ class TaskService: logging.exception("[TaskService] %s", err) raise e - async def list_task(req: ListTaskRequest) -> ListTaskMsg: + async def list_task(user_sub: str, req: ListTaskRequest) -> ListTaskMsg: """列出任务""" try: total, task_entities = await TaskManager.list_task(user_sub, req) @@ -82,8 +82,8 @@ class TaskService: user_sub, team_id, task_type) task_ids = [] for task_entity in task_entities: - task_id = await TaskQueueService.stop_task(task_entity.id) - if task_id: + task_id = await TaskQueueService.delete_task(task_entity.id) + if task_id is not None: task_ids.append(task_id) return task_ids except Exception as e: diff --git a/data_chain/entities/request_data.py b/data_chain/entities/request_data.py index f363e07..f83611d 100644 --- a/data_chain/entities/request_data.py +++ b/data_chain/entities/request_data.py @@ -122,19 +122,19 @@ class UpdateDocumentRequest(BaseModel): enabled: bool = Field(default=True, description="文档是否启用") -class UpdateChunkRequest(BaseModel): - text: str = Field(min_length=1, description="分块文本内容") - enabled: bool = Field(default=True, description="分块是否启用") - - class ListChunkRequest(BaseModel): doc_id: uuid.UUID = Field(description="文档id", alias="docId") text: Optional[str] = Field(default=None, description="分块文本内容", alias="text") - type: Optional[ChunkType] = Field(default=None, description="分块类型", alias="type") + types: Optional[list[ChunkType]] = Field(default=None, description="分块类型", alias="types") page: int = Field(default=1, description="页码") page_size: int = Field(default=40, description="每页数量", alias="pageSize") +class UpdateChunkRequest(BaseModel): + text: str = Field(default='这是一个默认的分块文本', description="分块文本内容", alias="text") + enabled: bool = Field(default=True, description="分块是否启用") + + class SearchChunkRequest(BaseModel): kb_ids: List[uuid.UUID] = Field(default=[], description="资产id", alias="kbIds") query: str = Field(default='', description="查询内容") diff --git a/data_chain/entities/response_data.py b/data_chain/entities/response_data.py index 09d1ee6..518ab4c 100644 --- a/data_chain/entities/response_data.py +++ b/data_chain/entities/response_data.py @@ -296,6 +296,11 @@ class UpdateChunkResponse(ResponseData): result: Optional[uuid.UUID] = Field(default=None, description="分片ID") +class UpdateChunkEnabledResponse(ResponseData): + """PUT /chunk 响应""" + result: list[uuid.UUID] = Field(default=[], description="分片ID列表") + + class DocChunk(BaseModel): """Post /chunk/search 数据结构""" doc_id: uuid.UUID = Field(description="文档ID", alias="docId") @@ -351,6 +356,7 @@ class Data(BaseModel): question: str = Field(description="数据的问题") answer: str = Field(description="数据的答案") chunk: str = Field(description="数据的片段") + chunk_type: ChunkType = Field(description="数据的片段类型", alias="chunkType") class ListDataInDatasetMsg(BaseModel): diff --git a/data_chain/manager/chunk_manager.py b/data_chain/manager/chunk_manager.py index 2854d03..0a43115 100644 --- a/data_chain/manager/chunk_manager.py +++ b/data_chain/manager/chunk_manager.py @@ -99,12 +99,13 @@ class ChunkManager(): stmt = stmt.where(ChunkEntity.doc_id == req.doc_id) if req.text: stmt = stmt.where(ChunkEntity.text.ilike(f"%{req.text}%")) - if req.type: - stmt = stmt.where(ChunkEntity.type == req.type) + if req.types: + stmt = stmt.where(ChunkEntity.type.in_([t.value for t in req.types])) count_stmt = select(func.count()).select_from(stmt.subquery()) total = (await session.execute(count_stmt)).scalar() + print(total) stmt = stmt.offset((req.page - 1) * req.page_size).limit(req.page_size) - stmt = stmt.order_by(ChunkEntity.global_offset.desc()) + stmt = stmt.order_by(ChunkEntity.global_offset) result = await session.execute(stmt) chunk_entities = result.scalars().all() return total, chunk_entities @@ -271,3 +272,26 @@ class ChunkManager(): except Exception as e: err = "根据文档ID更新文档解析结果失败" logging.exception("[ChunkManager] %s", err) + + @staticmethod + async def update_chunk_by_chunk_ids(chunk_ids: List[uuid.UUID], chunk_dict: Dict[str, str]) -> list[ChunkEntity]: + """根据文档ID更新文档解析结果""" + try: + async with await DataBase.get_session() as session: + stmt = ( + update(ChunkEntity) + .where(ChunkEntity.id.in_(chunk_ids)) + .values(**chunk_dict) + ) + await session.execute(stmt) + await session.commit() + stmt = ( + select(ChunkEntity) + .where(ChunkEntity.id.in_(chunk_ids)) + ) + result = await session.execute(stmt) + chunk_entities = result.scalars().all() + return chunk_entities + except Exception as e: + err = "根据文档ID更新文档解析结果失败" + logging.exception("[ChunkManager] %s", err) diff --git a/data_chain/manager/knowledge_manager.py b/data_chain/manager/knowledge_manager.py index 3ff0573..4bc32a1 100644 --- a/data_chain/manager/knowledge_manager.py +++ b/data_chain/manager/knowledge_manager.py @@ -142,6 +142,10 @@ class KnowledgeBaseManager(): and_(DocumentEntity.kb_id == kb_id, DocumentEntity.status != DocumentStatus.DELETED.value)) result = await session.execute(stmt) doc_cnt, doc_size = result.first() + if doc_cnt is None: + doc_cnt = 0 + if doc_size is None: + doc_size = 0 stmt = update(KnowledgeBaseEntity).where(KnowledgeBaseEntity.id == kb_id).values( doc_cnt=doc_cnt, doc_size=doc_size) await session.execute(stmt) diff --git a/data_chain/manager/task_manager.py b/data_chain/manager/task_manager.py index dd81c4c..97cb052 100644 --- a/data_chain/manager/task_manager.py +++ b/data_chain/manager/task_manager.py @@ -108,14 +108,18 @@ class TaskManager(): """列出任务""" try: async with await DataBase.get_session() as session: - stmt = select(TaskEntity).where(and_(TaskEntity.user_id == user_sub, - TaskEntity.status != TaskStatus.DELETED.value)) + stmt = select(TaskEntity).where( + and_( + TaskEntity.user_id == user_sub, + TaskEntity.status != TaskStatus.DELETED.value + ) + ) if req.team_id: stmt = stmt.where(TaskEntity.team_id == req.team_id) if req.task_id: stmt = stmt.where(TaskEntity.id == req.task_id) if req.task_type: - stmt = stmt.where(TaskEntity.task_type == req.task_type.value) + stmt = stmt.where(TaskEntity.type == req.task_type.value) if req.task_status: stmt = stmt.where(TaskEntity.status == req.task_status.value) @@ -123,6 +127,7 @@ class TaskManager(): total = (await session.execute(count_stmt)).scalar() stmt = stmt.offset((req.page - 1) * req.page_size).limit(req.page_size) stmt = stmt.order_by(TaskEntity.created_time.desc()) + stmt = stmt.order_by(TaskEntity.id.desc()) result = await session.execute(stmt) task_entities = result.scalars().all() return total, task_entities @@ -139,7 +144,7 @@ class TaskManager(): async with await DataBase.get_session() as session: stmt = select(TaskEntity).where(and_(TaskEntity.user_id == user_sub, TaskEntity.team_id == team_id, - TaskEntity.task_type == task_type.value, + TaskEntity.type == task_type.value, TaskEntity.status != TaskStatus.DELETED.value)) result = await session.execute(stmt) task_entities = result.scalars().all() diff --git a/data_chain/manager/testing_manager.py b/data_chain/manager/testing_manager.py index 86689db..0942f66 100644 --- a/data_chain/manager/testing_manager.py +++ b/data_chain/manager/testing_manager.py @@ -69,31 +69,42 @@ class TestingManager(): subq = (select(TaskEntity.op_id, TaskEntity.status, func.row_number().over( partition_by=TaskEntity.op_id, order_by=desc(TaskEntity.created_time)).label('rn')).subquery()) - stmt = ( + # 构建内层查询,先进行排序 + inner_stmt = ( select(TestingEntity.dataset_id) .outerjoin(subq, and_(TestingEntity.id == subq.c.op_id, subq.c.rn == 1)) .outerjoin(DataSetEntity, TestingEntity.dataset_id == DataSetEntity.id) ) - stmt = stmt.where(DataSetEntity.status != DataSetStatus.DELETED.value) - stmt = stmt.where(TestingEntity.status != TestingStatus.DELETED.value) + inner_stmt = inner_stmt.where(DataSetEntity.status != DataSetStatus.DELETED.value) + inner_stmt = inner_stmt.where(TestingEntity.status != TestingStatus.DELETED.value) if req.kb_id: - stmt = stmt.where(TestingEntity == req.kb_id) + inner_stmt = inner_stmt.where(TestingEntity.kb_id == req.kb_id) # 修正此处的条件 if req.testing_id: - stmt = stmt.where(TestingEntity.id == req.testing_id) + inner_stmt = inner_stmt.where(TestingEntity.id == req.testing_id) if req.testing_name: - stmt = stmt.where(TestingEntity.name.ilike(f"%{req.testing_name}%")) + inner_stmt = inner_stmt.where(TestingEntity.name.ilike(f"%{req.testing_name}%")) if req.llm_id: - stmt = stmt.where(TestingEntity.llm_id == req.llm_id) + inner_stmt = inner_stmt.where(TestingEntity.llm_id == req.llm_id) if req.run_status: - stmt = stmt.where(subq.c.status.in_([status.value for status in req.run_status])) + inner_stmt = inner_stmt.where(subq.c.status.in_([status.value for status in req.run_status])) if req.author_name: - stmt = stmt.where(TestingEntity.author_name.ilike(f"%{req.author_name}%")) - stmt = stmt.distinct(TestingEntity.dataset_id) - count_stmt = select(func.count()).select_from(stmt.subquery()) + inner_stmt = inner_stmt.where(TestingEntity.author_name.ilike(f"%{req.author_name}%")) + + # 按期望的顺序排序 + inner_stmt = inner_stmt.order_by(desc(TestingEntity.created_at)) + inner_stmt = inner_stmt.order_by(asc(TestingEntity.id)) + + # 将内层查询作为子查询,然后在外层查询中应用DISTINCT + distinct_subq = inner_stmt.subquery() + stmt = select(distinct_subq.c.dataset_id).distinct() + + # 计算总数 + count_stmt = select(func.count()).select_from(inner_stmt.subquery()) total = (await session.execute(count_stmt)).scalar() - stmt = stmt.order_by(desc(TestingEntity.created_at)) - stmt = stmt.order_by(asc(TestingEntity.id)) + + # 应用分页 stmt = stmt.offset((req.page - 1) * req.page_size).limit(req.page_size) + result = await session.execute(stmt) dataset_ids = result.scalars().all() return total, dataset_ids diff --git a/data_chain/parser/tools/ocr_tool.py b/data_chain/parser/tools/ocr_tool.py index c888225..ac67a7e 100644 --- a/data_chain/parser/tools/ocr_tool.py +++ b/data_chain/parser/tools/ocr_tool.py @@ -1,3 +1,5 @@ +from PIL import Image +import asyncio import yaml from paddleocr import PaddleOCR import numpy as np @@ -22,7 +24,7 @@ class OcrTool: @staticmethod async def ocr_from_image(image: np.ndarray) -> list: try: - ocr_result = await OcrTool.model.ocr(image) + ocr_result = OcrTool.model.ocr(image) if ocr_result is None or ocr_result[0] is None: return None return ocr_result @@ -70,20 +72,26 @@ class OcrTool: except Exception as e: err = f"[OCRTool] OCR增强失败 {e}" logging.exception(err) - return OCRTool.merge_text_from_ocr_result(ocr_result) + return OcrTool.merge_text_from_ocr_result(ocr_result) @staticmethod async def image_to_text(image: np.ndarray, image_related_text: str = '', llm: LLM = None) -> str: try: - ocr_result = await OCRTool.ocr_from_image(image) + ocr_result = await OcrTool.ocr_from_image(image) if ocr_result is None: return '' if llm is None: - text = await OCRTool.merge_text_from_ocr_result(ocr_result) + text = await OcrTool.merge_text_from_ocr_result(ocr_result) else: - text = await OCRTool.enhance_ocr_result(ocr_result, image_related_text, llm) + text = await OcrTool.enhance_ocr_result(ocr_result, image_related_text, llm) return text except Exception as e: err = f"[OCRTool] 图片转文本失败 {e}" logging.exception(err) return '' + + +path = './test.jpeg' +image = np.array(Image.open(path)) +result = asyncio.run(OcrTool.ocr_from_image(image)) +print(result) diff --git a/data_chain/stores/database/database.py b/data_chain/stores/database/database.py index 5eef775..0ec2347 100644 --- a/data_chain/stores/database/database.py +++ b/data_chain/stores/database/database.py @@ -408,6 +408,7 @@ class QAEntity(Base): question = Column(String) # 数据的问题 answer = Column(String) # 数据的答案 chunk = Column(String) # 数据的片段 + chunk_type = Column(String, default="未知片段类型") # 数据的片段类型 status = Column(String) created_at = Column( TIMESTAMP(timezone=True), diff --git a/openapi.json b/openapi.json index 0652e76..e3d7b3d 100644 --- a/openapi.json +++ b/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"FastAPI","version":"0.1.0"},"paths":{"/routes":{"get":{"summary":"Get All Routes","operationId":"get_all_routes_routes_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/team/list":{"post":{"tags":["Team"],"summary":"List Teams","operationId":"list_teams_team_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/usr":{"post":{"tags":["Team"],"summary":"List Team User By Team Id","operationId":"list_team_user_by_team_id_team_usr_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_usr_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamUserRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team User By Team Id And User Subs","operationId":"delete_team_user_by_team_id_and_user_subs_team_usr_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string"},"title":"Usersub"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/msg":{"post":{"tags":["Team"],"summary":"List Team Msg By Team Id","operationId":"list_team_msg_by_team_id_team_msg_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team":{"post":{"tags":["Team"],"summary":"Create Team","operationId":"create_team_team_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team By Team Id","operationId":"delete_team_by_team_id_team_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/invitation":{"post":{"tags":["Team"],"summary":"Invite Team User By User Sub","operationId":"invite_team_user_by_user_sub_team_invitation_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"userSubInvite","in":"query","required":true,"schema":{"type":"string","title":"Usersubinvite"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/InviteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/application":{"post":{"tags":["Team"],"summary":"Join Team","operationId":"join_team_team_application_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JoinTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/author":{"put":{"tags":["Team"],"summary":"Update Team Author By Team Id","operationId":"update_team_author_by_team_id_team_author_put","parameters":[{"name":"recriverSub","in":"query","required":true,"schema":{"type":"string","title":"Recriversub"}},{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamAuthorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb":{"get":{"tags":["Knowledge Base"],"summary":"List Kb By User Sub","operationId":"list_kb_by_user_sub_kb_get","parameters":[{"name":"kbName","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListAllKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"tags":["Knowledge Base"],"summary":"Create Kb","operationId":"create_kb_kb_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Knowledge Base"],"summary":"Update Kb By Kb Id","operationId":"update_kb_by_kb_id_kb_put","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Knowledge Base"],"summary":"Delete Kb By Kb Ids","operationId":"delete_kb_by_kb_ids_kb_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/team":{"post":{"tags":["Knowledge Base"],"summary":"List Kb By Team Id","operationId":"list_kb_by_team_id_kb_team_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/doc_type":{"get":{"tags":["Knowledge Base"],"summary":"List Doc Types By Kb Id","operationId":"list_doc_types_by_kb_id_kb_doc_type_get","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentTypesResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/download":{"get":{"tags":["Knowledge Base"],"summary":"Download Kb By Task Id","operationId":"download_kb_by_task_id_kb_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/import":{"post":{"tags":["Knowledge Base"],"summary":"Import Kbs","operationId":"import_kbs_kb_import_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_kbs_kb_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/export":{"post":{"tags":["Knowledge Base"],"summary":"Export Kb By Kb Ids","operationId":"export_kb_by_kb_ids_kb_export_post","parameters":[{"name":"kbIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/list":{"post":{"tags":["Chunk"],"summary":"List Chunks By Document Id","operationId":"list_chunks_by_document_id_chunk_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/search":{"post":{"tags":["Chunk"],"summary":"Search Chunks","operationId":"search_chunks_chunk_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk":{"put":{"tags":["Chunk"],"summary":"Update Chunk By Id","operationId":"update_chunk_by_id_chunk_put","parameters":[{"name":"chunkId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Chunkid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/list":{"post":{"tags":["Document"],"summary":"List Doc","operationId":"list_doc_doc_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/download":{"get":{"tags":["Document"],"summary":"Download Doc By Id","operationId":"download_doc_by_id_doc_download_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/report":{"get":{"tags":["Document"],"summary":"Get Doc Report","operationId":"get_doc_report_doc_report_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetDocumentReportResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc":{"post":{"tags":["Document"],"summary":"Upload Docs","operationId":"upload_docs_doc_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_upload_docs_doc_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UploadDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Document"],"summary":"Update Doc By Doc Id","operationId":"update_doc_by_doc_id_doc_put","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/parse":{"post":{"tags":["Document"],"summary":"Parse Docuement By Doc Ids","operationId":"parse_docuement_by_doc_ids_doc_parse_post","parameters":[{"name":"docIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}},{"name":"parse","in":"query","required":true,"schema":{"type":"boolean","title":"Parse"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ParseDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/health_check":{"get":{"tags":["Health check"],"summary":"Health Check","operationId":"health_check_health_check_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/dataset/list":{"post":{"tags":["Dataset"],"summary":"List Dataset By Kb Id","operationId":"list_dataset_by_kb_id_dataset_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/data":{"post":{"tags":["Dataset"],"summary":"List Data In Dataset","operationId":"list_data_in_dataset_dataset_data_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Data By Dataset Id","operationId":"update_data_by_dataset_id_dataset_data_put","parameters":[{"name":"dataId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Dataid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Data By Data Ids","operationId":"delete_data_by_data_ids_dataset_data_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Dataids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/testing/exist":{"get":{"tags":["Dataset"],"summary":"Is Dataset Have Testing","operationId":"is_dataset_have_testing_dataset_testing_exist_get","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/IsDatasetHaveTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/download":{"get":{"tags":["Dataset"],"summary":"Download Dataset By Task Id","operationId":"download_dataset_by_task_id_dataset_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset":{"post":{"tags":["Dataset"],"summary":"Create Dataset","operationId":"create_dataset_dataset_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Dataset By Dataset Id","operationId":"update_dataset_by_dataset_id_dataset_put","parameters":[{"name":"databaseId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Databaseid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Dataset By Dataset Ids","operationId":"delete_dataset_by_dataset_ids_dataset_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Databaseid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/import":{"post":{"tags":["Dataset"],"summary":"Import Dataset","operationId":"import_dataset_dataset_import_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_dataset_dataset_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/export":{"post":{"tags":["Dataset"],"summary":"Export Dataset By Dataset Ids","operationId":"export_dataset_by_dataset_ids_dataset_export_post","parameters":[{"name":"datasetIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Datasetids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/generate":{"post":{"tags":["Dataset"],"summary":"Generate Dataset By Id","operationId":"generate_dataset_by_id_dataset_generate_post","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}},{"name":"generate","in":"query","required":true,"schema":{"type":"boolean","title":"Generate"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/other/llm":{"get":{"tags":["Other"],"summary":"List Llms By User Sub","operationId":"list_llms_by_user_sub_other_llm_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListLLMResponse"}}}}}}},"/other/embedding":{"get":{"tags":["Other"],"summary":"List Embeddings","operationId":"list_embeddings_other_embedding_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListEmbeddingResponse"}}}}}}},"/other/tokenizer":{"get":{"tags":["Other"],"summary":"List Tokenizers","operationId":"list_tokenizers_other_tokenizer_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTokenizerResponse"}}}}}}},"/other/parse_method":{"get":{"tags":["Other"],"summary":"List Parse Method","operationId":"list_parse_method_other_parse_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListParseMethodResponse"}}}}}}},"/other/search_method":{"get":{"tags":["Other"],"summary":"List Search Method","operationId":"list_search_method_other_search_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListSearchMethodResponse"}}}}}}},"/testing/list":{"post":{"tags":["Testing"],"summary":"List Testing By Kb Id","operationId":"list_testing_by_kb_id_testing_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/testcase":{"post":{"tags":["Testing"],"summary":"List Testcase By Testing Id","operationId":"list_testcase_by_testing_id_testing_testcase_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestCaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/download":{"get":{"tags":["Testing"],"summary":"Download Testing Report By Testing Id","operationId":"download_testing_report_by_testing_id_testing_download_get","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing":{"post":{"tags":["Testing"],"summary":"Create Testing","operationId":"create_testing_testing_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingResponsing"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Testing"],"summary":"Update Testing By Testing Id","operationId":"update_testing_by_testing_id_testing_put","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Testing"],"summary":"Delete Testing By Testing Ids","operationId":"delete_testing_by_testing_ids_testing_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Testingids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/run":{"post":{"tags":["Testing"],"summary":"Run Testing By Testing Id","operationId":"run_testing_by_testing_id_testing_run_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}},{"name":"run","in":"query","required":true,"schema":{"type":"boolean","title":"Run"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/RunTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role/action":{"get":{"tags":["Role"],"summary":"List Actions","operationId":"list_actions_role_action_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListActionResponse"}}}}}}},"/role/list":{"post":{"tags":["Role"],"summary":"List Role By Team Id","operationId":"list_role_by_team_id_role_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role":{"post":{"tags":["Role"],"summary":"Create Role","operationId":"create_role_role_post","parameters":[{"name":"TeamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Role"],"summary":"Update Role By Role Id","operationId":"update_role_by_role_id_role_put","parameters":[{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Role"],"summary":"Delete Role By Role Ids","operationId":"delete_role_by_role_ids_role_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Roleid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg/list":{"post":{"tags":["User Message"],"summary":"List User Msgs By User Sub","operationId":"list_user_msgs_by_user_sub_usr_msg_list_post","parameters":[{"name":"msgType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserMessageType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg":{"put":{"tags":["User Message"],"summary":"Update User Msg By Msg Id","operationId":"update_user_msg_by_msg_id_usr_msg_put","parameters":[{"name":"msgId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Msgid"}},{"name":"msgStatus","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserStatus"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["User Message"],"summary":"Delete User Msg By Msg Ids","operationId":"delete_user_msg_by_msg_ids_usr_msg_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Msgids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task":{"post":{"tags":["Task"],"summary":"List Task","operationId":"list_task_task_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/one":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Id","operationId":"delete_task_by_task_id_task_one_delete","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByIdResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/all":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Type","operationId":"delete_task_by_task_type_task_all_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"taskType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/TaskType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByTypeResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"ActionType":{"type":"string","enum":["team","user","role","knowledge_base","document","chunk","dataset","testing","dataset_data","task"],"title":"ActionType","description":"操作类型"},"Body_import_dataset_dataset_import_post":{"properties":{"dataset_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Dataset Packages"}},"type":"object","required":["dataset_packages"],"title":"Body_import_dataset_dataset_import_post"},"Body_import_kbs_kb_import_post":{"properties":{"kb_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Kb Packages"}},"type":"object","required":["kb_packages"],"title":"Body_import_kbs_kb_import_post"},"Body_upload_docs_doc_post":{"properties":{"docs":{"items":{"type":"string","format":"binary"},"type":"array","title":"Docs"}},"type":"object","required":["docs"],"title":"Body_upload_docs_doc_post"},"Chunk":{"properties":{"chunkId":{"type":"string","format":"uuid","title":"Chunkid","description":"分片ID"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"分片类型"},"text":{"type":"string","title":"Text","description":"分片文本"}},"type":"object","required":["chunkId","chunkType","text"],"title":"Chunk","description":"文档分片信息"},"ChunkType":{"type":"string","enum":["text","table","image","code","link"],"title":"ChunkType","description":"分块类型"},"CreateDatasetRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""},"documentIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Documentids","description":"测试数据集关联的文档","default":[]},"dataCnt":{"type":"integer","title":"Datacnt","description":"测试数据集内的数据数量","default":64,"max":512,"min":1},"llmId":{"type":"string","title":"Llmid","description":"测试数据集使用的大模型id"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"测试数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"测试数据集进行上下文关联","default":false}},"type":"object","required":["kbId","llmId"],"title":"CreateDatasetRequest"},"CreateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集生成任务ID"}},"type":"object","title":"CreateDatasetResponse","description":"POST /dataset 响应"},"CreateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":20,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"知识库使用的embedding模型","default":""},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"CreateKnowledgeBaseRequest"},"CreateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"CreateKnowledgeBaseResponse","description":"POST /kb 响应"},"CreateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"CreateRoleRequest"},"CreateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"CreateRoleResponse","description":"POST /role 响应"},"CreateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"CreateTeamRequest"},"CreateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"CreateTeamResponse","description":"POST /team 响应"},"CreateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":30,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"测试数据集id"},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_and_vector"},"topK":{"type":"integer","title":"Topk","description":"测试中检索方法关联的片段数量","default":5}},"type":"object","required":["datasetId","llmId"],"title":"CreateTestingRequest"},"CreateTestingResponsing":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"CreateTestingResponsing","description":"POST /testing 响应"},"Data":{"properties":{"dataId":{"type":"string","format":"uuid","title":"Dataid","description":"数据ID"},"docName":{"type":"string","title":"Docname","description":"数据关联的文档名称"},"question":{"type":"string","title":"Question","description":"数据的问题"},"answer":{"type":"string","title":"Answer","description":"数据的答案"},"chunk":{"type":"string","title":"Chunk","description":"数据的片段"}},"type":"object","required":["dataId","docName","question","answer","chunk"],"title":"Data"},"DataSetStatus":{"type":"string","enum":["idle","pending","generating","exporting","importing","deleted"],"title":"DataSetStatus","description":"数据集状态"},"Dataset":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"数据集描述","max":150},"dataCnt":{"type":"integer","title":"Datacnt","description":"数据集数据数量"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"数据集进行上下文关联","default":false},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"生成数据集使用的大模型信息"},"generateTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"数据集生成任务"},"score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Score","description":"数据集评分"},"authorName":{"type":"string","title":"Authorname","description":"数据集创建者的用户名"},"status":{"$ref":"#/components/schemas/DataSetStatus"}},"type":"object","required":["datasetId","datasetName","description","dataCnt","authorName","status"],"title":"Dataset","description":"数据集信息"},"DatasetTesting":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称"},"testings":{"items":{"$ref":"#/components/schemas/Testing"},"type":"array","title":"Testings","description":"测试列表","default":[]}},"type":"object","required":["datasetId","datasetName"],"title":"DatasetTesting","description":"数据集测试信息"},"DeleteDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据ID列表","default":[]}},"type":"object","title":"DeleteDataResponse","description":"DELETE /dataset/data 响应"},"DeleteDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据集ID列表","default":[]}},"type":"object","title":"DeleteDatasetResponse","description":"DELETE /dataset 响应"},"DeleteDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"DeleteDocumentResponse","description":"DELETE /doc 响应"},"DeleteKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"知识库ID列表","default":[]}},"type":"object","title":"DeleteKnowledgeBaseResponse","description":"DELETE /kb 响应"},"DeleteRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"角色ID列表","default":[]}},"type":"object","title":"DeleteRoleResponse","description":"DELETE /role 响应"},"DeleteTaskByIdResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"任务ID"}},"type":"object","title":"DeleteTaskByIdResponse","description":"DELETE /task/one 响应"},"DeleteTaskByTypeResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"DeleteTaskByTypeResponse","description":"DELETE /task/all 响应"},"DeleteTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"DeleteTeamResponse","description":"DELETE /team 响应"},"DeleteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"团队成员ID列表","default":[]}},"type":"object","title":"DeleteTeamUserResponse","description":"DELETE /team/usr 响应"},"DeleteTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"测试ID列表","default":[]}},"type":"object","title":"DeleteTestingResponse","description":"DELETE /testing 响应"},"DeleteUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"消息ID列表","default":[]}},"type":"object","title":"DeleteUserMessageResponse","description":"DELETE /usr_msg 响应"},"DocChunk":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","required":["docId","docName"],"title":"DocChunk","description":"Post /chunk/search 数据结构"},"Document":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"docType":{"allOf":[{"$ref":"#/components/schemas/DocumentType-Output"}],"description":"文档类型"},"chunkSize":{"type":"integer","title":"Chunksize","description":"文档分片大小"},"createdTime":{"type":"string","title":"Createdtime","description":"文档创建时间"},"docTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"文档任务"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"文档解析方法"},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用"},"authorName":{"type":"string","title":"Authorname","description":"文档创建者的用户名"},"status":{"$ref":"#/components/schemas/DocumentStatus"}},"type":"object","required":["docId","docName","docType","chunkSize","createdTime","docTask","parseMethod","enabled","authorName","status"],"title":"Document","description":"文档信息"},"DocumentStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"DocumentStatus","description":"文档状态"},"DocumentType-Input":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"docTypeName":{"type":"string","maxLength":20,"minLength":1,"title":"Doctypename","default":"这是一个默认的文档类型名称"}},"type":"object","title":"DocumentType"},"DocumentType-Output":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型ID"},"docTypeName":{"type":"string","title":"Doctypename","description":"文档类型名称"}},"type":"object","required":["docTypeId","docTypeName"],"title":"DocumentType","description":"文档类型信息"},"ExportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ExportDatasetResponse","description":"POST /dataset/export 响应"},"ExportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ExportKnowledgeBaseResponse","description":"POST /kb/export 响应"},"GenerateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"GenerateDatasetResponse","description":"POST /dataset/generate 响应"},"GetDocumentReportResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","title":"Result","description":"文档报告数据结构","default":""}},"type":"object","title":"GetDocumentReportResponse","description":"GET /doc/report 响应"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ImportDatasetResponse","description":"POST /dataset/import 响应"},"ImportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ImportKnowledgeBaseResponse","description":"POST /kb/import 响应"},"InviteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"邀请ID"}},"type":"object","title":"InviteTeamUserResponse","description":"POST /team/invitation 响应"},"IsDatasetHaveTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"boolean","title":"Result","description":"数据集是否有测试任务","default":false}},"type":"object","title":"IsDatasetHaveTestingResponse","description":"GET /dataset/testing/exist 响应"},"JoinTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"申请ID"}},"type":"object","title":"JoinTeamResponse","description":"POST /team/application 响应"},"Knowledgebase":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"知识库ID"},"kbName":{"type":"string","title":"Kbname","description":"知识库名称","max":20,"min":1},"authorName":{"type":"string","title":"Authorname","description":"知识库创建者的用户名"},"tokenizer":{"$ref":"#/components/schemas/Tokenizer"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"嵌入模型"},"description":{"type":"string","title":"Description","description":"知识库描述","max":150},"docCnt":{"type":"integer","title":"Doccnt","description":"知识库文档数量"},"docSize":{"type":"integer","title":"Docsize","description":"知识库文档大小"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库单次文件上传数量限制"},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库单次文件上传大小限制"},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"默认解析方法"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"默认分块大小"},"createdTime":{"type":"string","title":"Createdtime","description":"知识库创建时间"},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Doctypes","description":"知识库文档类型列表","default":[]}},"type":"object","required":["kbId","kbName","authorName","tokenizer","embeddingModel","description","docCnt","docSize","uploadCountLimit","uploadSizeLimit","defaultParseMethod","defaultChunkSize","createdTime"],"title":"Knowledgebase","description":"知识库信息"},"LLM":{"properties":{"llmId":{"type":"string","title":"Llmid","description":"大模型ID"},"llmName":{"type":"string","title":"Llmname","description":"大模型名称","max":20,"min":1},"llmIcon":{"type":"string","title":"Llmicon","description":"大模型图标"}},"type":"object","required":["llmId","llmName","llmIcon"],"title":"LLM"},"ListActionMsg":{"properties":{"actionTypes":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Actiontypes","description":"操作类型列表","default":[]}},"type":"object","title":"ListActionMsg","description":"GET /role/action 数据结构"},"ListActionResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListActionMsg"}],"description":"操作列表数据结构","default":{"actionTypes":[]}}},"type":"object","title":"ListActionResponse"},"ListAllKnowledgeBaseMsg":{"properties":{"teamKnowledgebases":{"items":{"$ref":"#/components/schemas/TeamKnowledgebase"},"type":"array","title":"Teamknowledgebases","description":"团队知识库列表","default":[]}},"type":"object","title":"ListAllKnowledgeBaseMsg","description":"GET /kb 数据结构"},"ListAllKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListAllKnowledgeBaseMsg"}],"description":"团队知识库列表数据结构","default":{"teamKnowledgebases":[]}}},"type":"object","title":"ListAllKnowledgeBaseResponse","description":"GET /kb 响应"},"ListChunkMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","title":"ListChunkMsg","description":"GET /chunk 数据结构"},"ListChunkRequest":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档id"},"text":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Text","description":"分块文本内容"},"type":{"anyOf":[{"$ref":"#/components/schemas/ChunkType"},{"type":"null"}],"description":"分块类型"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["docId"],"title":"ListChunkRequest"},"ListChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListChunkMsg"}],"description":"分片列表数据结构","default":{"total":0,"chunks":[]}}},"type":"object","title":"ListChunkResponse","description":"GET /chunk 响应"},"ListDataInDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"data":{"items":{"$ref":"#/components/schemas/Data"},"type":"array","title":"Data","description":"数据列表","default":[]}},"type":"object","title":"ListDataInDatasetMsg","description":"GET /dataset/data 数据结构"},"ListDataInDatasetRequest":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["datasetId"],"title":"ListDataInDatasetRequest"},"ListDataInDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDataInDatasetMsg"}],"description":"数据列表数据结构","default":{"total":0,"data":[]}}},"type":"object","title":"ListDataInDatasetResponse","description":"GET /dataset/data 响应"},"ListDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasets":{"items":{"$ref":"#/components/schemas/Dataset"},"type":"array","title":"Datasets","description":"数据集列表","default":[]}},"type":"object","title":"ListDatasetMsg","description":"GET /dataset 数据结构"},"ListDatasetRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"datasetId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Datasetid","description":"数据集id"},"datasetName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Datasetname","description":"数据集名称"},"dataCnt":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集数据数量","default":"desc"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"数据集使用的大模型id"},"isDataCleared":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Isdatacleared","description":"数据集是否清洗"},"isChunkRelated":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Ischunkrelated","description":"数据集是否上下文关联"},"generateStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Generatestatus","description":"数据集生成状态"},"scoreOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集评分的排序方法","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"数据集创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListDatasetRequest"},"ListDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDatasetMsg"}],"description":"数据集列表数据结构","default":{"total":0,"datasets":[]}}},"type":"object","title":"ListDatasetResponse","description":"GET /dataset 响应"},"ListDocumentMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"documents":{"items":{"$ref":"#/components/schemas/Document"},"type":"array","title":"Documents","description":"文档列表","default":[]}},"type":"object","title":"ListDocumentMsg","description":"GET /doc 数据结构"},"ListDocumentRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"docId":{"anyOf":[{"type":"string","maxLength":30,"minLength":1,"format":"uuid"},{"type":"null"}],"title":"Docid","description":"文档id"},"docName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Docname","description":"文档名称"},"docTypeId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Doctypeid","description":"文档类型id"},"parseStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Parsestatus","description":"文档解析状态"},"parseMethod":{"anyOf":[{"$ref":"#/components/schemas/ParseMethod"},{"type":"null"}],"description":"文档解析方法"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"文档创建者"},"createdTimeStart":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimestart","description":"文档创建时间开始"},"createdTimeEnd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimeend","description":"文档创建时间结束"},"createdTimeOrder":{"allOf":[{"$ref":"#/components/schemas/OrderType"}],"description":"文档创建时间排序","default":"desc"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListDocumentRequest"},"ListDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDocumentMsg"}],"description":"文档列表数据结构","default":{"total":0,"documents":[]}}},"type":"object","title":"ListDocumentResponse","description":"GET /doc 响应"},"ListDocumentTypesResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Result","description":"文档类型列表数据结构","default":[]}},"type":"object","title":"ListDocumentTypesResponse","description":"GET /kb/doc_type 响应"},"ListEmbeddingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"向量化模型的列表数据结构","default":[]}},"type":"object","title":"ListEmbeddingResponse","description":"GET /other/embedding 数据结构"},"ListKnowledgeBaseMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表数据结构","default":[]}},"type":"object","title":"ListKnowledgeBaseMsg"},"ListKnowledgeBaseRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"kbName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname","description":"资产名称"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"资产创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListKnowledgeBaseRequest"},"ListKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListKnowledgeBaseMsg"}],"default":{"total":0,"kbList":[]}}},"type":"object","title":"ListKnowledgeBaseResponse","description":"GET /kb/team 响应"},"ListLLMMsg":{"properties":{"llms":{"items":{"$ref":"#/components/schemas/LLM"},"type":"array","title":"Llms","description":"大模型列表","default":[]}},"type":"object","title":"ListLLMMsg","description":"GET /other/llm 数据结构"},"ListLLMResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListLLMMsg"}],"description":"大模型列表数据结构","default":{"llms":[]}}},"type":"object","title":"ListLLMResponse","description":"GET /other/llm 响应"},"ListParseMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"解析方法的列表数据结构","default":[]}},"type":"object","title":"ListParseMethodResponse","description":"\"GET /other/parse_method 响应"},"ListRoleMsg":{"properties":{"roles":{"items":{"$ref":"#/components/schemas/role"},"type":"array","title":"Roles","description":"角色列表","default":[]}},"type":"object","title":"ListRoleMsg","description":"GET /role 数据结构"},"ListRoleRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"roleId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Roleid","description":"角色id"},"roleName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rolename","description":"角色名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListRoleRequest"},"ListRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListRoleMsg"}],"description":"角色列表数据结构","default":{"roles":[]}}},"type":"object","title":"ListRoleResponse","description":"GET /role 响应"},"ListSearchMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"搜索方法的列表数据结构","default":[]}},"type":"object","title":"ListSearchMethodResponse","description":"GET /other/search_method 响应"},"ListTaskMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"tasks":{"items":{"$ref":"#/components/schemas/Task"},"type":"array","title":"Tasks","description":"任务列表","default":[]}},"type":"object","title":"ListTaskMsg","description":"GET /task 数据结构"},"ListTaskRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"taskId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Taskid","description":"任务id"},"taskType":{"anyOf":[{"$ref":"#/components/schemas/TaskType"},{"type":"null"}],"description":"任务类型"},"taskStatus":{"anyOf":[{"$ref":"#/components/schemas/TaskStatus"},{"type":"null"}],"description":"任务状态"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTaskRequest"},"ListTaskResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTaskMsg"}],"description":"任务列表数据结构","default":{"total":0,"tasks":[]}}},"type":"object","title":"ListTaskResponse","description":"GET /task 响应"},"ListTeamMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teams":{"items":{"$ref":"#/components/schemas/Team"},"type":"array","title":"Teams","description":"团队列表","default":[]}},"type":"object","title":"ListTeamMsg","description":"GET /team 数据结构"},"ListTeamMsgMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamMsgs":{"items":{"$ref":"#/components/schemas/TeamMsg"},"type":"array","title":"Teammsgs","description":"团队消息列表","default":[]}},"type":"object","title":"ListTeamMsgMsg","description":"GET /team/msg 数据结构"},"ListTeamMsgRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamMsgRequest"},"ListTeamMsgResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsgMsg"}],"description":"团队消息列表数据结构","default":{"total":0,"teamMsgs":[]}}},"type":"object","title":"ListTeamMsgResponse"},"ListTeamRequest":{"properties":{"teamType":{"allOf":[{"$ref":"#/components/schemas/TeamType"}],"description":"团队类型","default":"all"},"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"teamName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Teamname","description":"团队名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamRequest"},"ListTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsg"}],"description":"团队列表数据结构","default":{"total":0,"teams":[]}}},"type":"object","title":"ListTeamResponse","description":"GET /team 响应"},"ListTeamUserMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamUsers":{"items":{"$ref":"#/components/schemas/TeamUser"},"type":"array","title":"Teamusers","description":"团队成员列表","default":[]}},"type":"object","title":"ListTeamUserMsg","description":"GET /team/usr 数据结构"},"ListTeamUserRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"userSub":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Usersub","description":"用户ID"},"userName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Username","description":"用户名"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListTeamUserRequest"},"ListTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamUserMsg"}],"description":"团队成员列表数据结构","default":{"total":0,"teamUsers":[]}}},"type":"object","title":"ListTeamUserResponse"},"ListTestCaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/TestingTestCase"}],"description":"测试用例列表数据结构","default":{"aveScore":-1.0,"avePre":-1.0,"aveRec":-1.0,"aveFai":-1.0,"aveRel":-1.0,"aveLcs":-1.0,"aveLeve":-1.0,"aveJac":-1.0,"testCases":[]}}},"type":"object","title":"ListTestCaseResponse","description":"GET /testing/testcase 响应"},"ListTestingMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasetTestings":{"items":{"$ref":"#/components/schemas/DatasetTesting"},"type":"array","title":"Datasettestings","description":"数据集测试列表","default":[]}},"type":"object","title":"ListTestingMsg","description":"GET /testing 数据结构"},"ListTestingRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"testingId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Testingid","description":"测试id"},"testingName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Testingname","description":"测试名称"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"测试使用的大模型id"},"runStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Runstatus","description":"测试运行状态"},"scoresOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"测试评分","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"测试创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTestingRequest"},"ListTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTestingMsg"}],"description":"测试列表数据结构","default":{"total":0,"datasetTestings":[]}}},"type":"object","title":"ListTestingResponse","description":"GET /testing 响应"},"ListTokenizerResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"分词器的列表数据结构","default":[]}},"type":"object","title":"ListTokenizerResponse","description":"GET /other/tokenizer 响应"},"ListUserMessageMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"userMessages":{"items":{"$ref":"#/components/schemas/UserMsg"},"type":"array","title":"Usermessages","description":"用户消息列表","default":[]}},"type":"object","title":"ListUserMessageMsg","description":"GET /usr_msg 数据结构"},"ListUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListUserMessageMsg"}],"description":"用户消息列表数据结构","default":{"total":0,"userMessages":[]}}},"type":"object","title":"ListUserMessageResponse"},"OrderType":{"type":"string","enum":["asc","desc"],"title":"OrderType","description":"排序"},"ParseDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"ParseDocumentResponse","description":"POST /doc/parse 响应"},"ParseMethod":{"type":"string","enum":["general","ocr","enhanced","qa"],"title":"ParseMethod","description":"解析方法"},"RunTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"RunTestingResponse","description":"POST /testing/run 响应"},"SearchChunkMsg":{"properties":{"docChunks":{"items":{"$ref":"#/components/schemas/DocChunk"},"type":"array","title":"Docchunks","description":"文档分片列表","default":[]}},"type":"object","title":"SearchChunkMsg","description":"Post /chunk/search 数据结构"},"SearchChunkRequest":{"properties":{"kbIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Kbids","description":"资产id","default":[]},"query":{"type":"string","title":"Query","description":"查询内容","default":""},"topK":{"type":"integer","title":"Topk","description":"返回的结果数量","default":5},"docIds":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Docids","description":"文档id"},"bannedIds":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Bannedids","description":"禁止的分块id","default":[]},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"检索方法","default":"keyword_and_vector"},"isRelatedSurrounding":{"type":"boolean","title":"Isrelatedsurrounding","description":"是否关联上下文","default":true},"isClassifyByDoc":{"type":"boolean","title":"Isclassifybydoc","description":"是否按文档分类","default":false},"isRerank":{"type":"boolean","title":"Isrerank","description":"是否重新排序","default":false},"isCompress":{"type":"boolean","title":"Iscompress","description":"是否压缩","default":false},"tokensLimit":{"type":"integer","title":"Tokenslimit","description":"token限制","default":8192}},"type":"object","title":"SearchChunkRequest"},"SearchChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/SearchChunkMsg"}],"description":"文档分片列表数据结构","default":{"docChunks":[]}}},"type":"object","title":"SearchChunkResponse","description":"POST /chunk/search 响应"},"SearchMethod":{"type":"string","enum":["keyword","vector","keyword_and_vector","doc2chunk","doc2chunk_bfs","enhanced_by_llm"],"title":"SearchMethod","description":"搜索方法"},"Task":{"properties":{"opId":{"type":"string","format":"uuid","title":"Opid","description":"关联实体ID"},"opName":{"type":"string","title":"Opname","description":"关联实体的名称"},"taskId":{"type":"string","format":"uuid","title":"Taskid","description":"任务ID"},"taskStatus":{"$ref":"#/components/schemas/TaskStatus"},"taskType":{"$ref":"#/components/schemas/TaskType"},"taskCompleted":{"type":"number","title":"Taskcompleted","description":"任务完成度"},"createdTime":{"type":"string","title":"Createdtime","description":"任务创建时间"}},"type":"object","required":["opId","opName","taskId","taskStatus","taskType","taskCompleted","createdTime"],"title":"Task","description":"任务信息"},"TaskStatus":{"type":"string","enum":["pending","running","success","failed","canceled","deleted"],"title":"TaskStatus","description":"任务状态"},"TaskType":{"type":"string","enum":["doc_parse","kb_export","kb_import","dataset_export","dataset_import","dataset_generate","testing_run"],"title":"TaskType","description":"任务类型"},"Team":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","description":"团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","description":"团队描述"},"authorName":{"type":"string","title":"Authorname","description":"团队创建者的用户ID"},"memberCount":{"type":"integer","title":"Membercount","description":"团队成员数量"},"isPublic":{"type":"boolean","title":"Ispublic","description":"是否为公开团队"},"createdTime":{"type":"string","title":"Createdtime","description":"团队创建时间"}},"type":"object","required":["teamId","teamName","description","authorName","memberCount","isPublic","createdTime"],"title":"Team","description":"团队信息"},"TeamKnowledgebase":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","title":"Teamname","description":"团队名称"},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表","default":[]}},"type":"object","required":["teamId","teamName"],"title":"TeamKnowledgebase","description":"团队知识库信息"},"TeamMsg":{"properties":{"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"authorName":{"type":"string","title":"Authorname","description":"消息发送者的用户名"},"message":{"type":"string","title":"Message","description":"消息内容"}},"type":"object","required":["msgId","authorName","message"],"title":"TeamMsg","description":"团队信息"},"TeamType":{"type":"string","enum":["mycreated","myjoined","all"],"title":"TeamType","description":"团队类型"},"TeamUser":{"properties":{"userId":{"type":"string","format":"uuid","title":"Userid","description":"用户ID"},"userName":{"type":"string","title":"Username","description":"用户名"},"roleName":{"type":"string","title":"Rolename","description":"角色名称"}},"type":"object","required":["userId","userName","roleName"],"title":"TeamUser","description":"团队成员信息"},"TestCase":{"properties":{"testCaseId":{"type":"string","format":"uuid","title":"Testcaseid","description":"测试用例ID"},"question":{"type":"string","title":"Question","description":"问题"},"answer":{"type":"string","title":"Answer","description":"标准答案"},"llm_answer":{"type":"string","title":"Llm Answer","description":"大模型的回答"},"related_chunk":{"type":"string","title":"Related Chunk","description":"检索到的片段"},"doc_name":{"type":"string","title":"Doc Name","description":"来源文档"},"score":{"type":"number","title":"Score","description":"综合得分"},"pre":{"type":"number","title":"Pre","description":"精确率"},"rec":{"type":"number","title":"Rec","description":"召回率"},"fai":{"type":"number","title":"Fai","description":"忠实值"},"rel":{"type":"number","title":"Rel","description":"可解释性"},"lcs":{"type":"number","title":"Lcs","description":"最长公共子串得分"},"leve":{"type":"number","title":"Leve","description":"编辑距离得分"},"jac":{"type":"number","title":"Jac","description":"杰卡德相似系数"}},"type":"object","required":["testCaseId","question","answer","llm_answer","related_chunk","doc_name","score","pre","rec","fai","rel","lcs","leve","jac"],"title":"TestCase","description":"测试用例信息"},"Testing":{"properties":{"testingId":{"type":"string","format":"uuid","title":"Testingid","description":"测试ID"},"testingName":{"type":"string","title":"Testingname","description":"测试名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"测试描述","max":150},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"测试使用的大模型信息"},"searchMethod":{"$ref":"#/components/schemas/SearchMethod"},"testingTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"测试任务"},"aveScore":{"type":"number","title":"Avescore","description":"综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"杰卡德相似系数","default":-1},"authorName":{"type":"string","title":"Authorname","description":"测试创建者的用户名"},"status":{"$ref":"#/components/schemas/TestingStatus"}},"type":"object","required":["testingId","testingName","description","searchMethod","authorName","status"],"title":"Testing","description":"测试信息"},"TestingStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"TestingStatus","description":"测试状态"},"TestingTestCase":{"properties":{"aveScore":{"type":"number","title":"Avescore","description":"平均综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"平均精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"平均召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"平均忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"平均可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"平均最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"平均编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"平均杰卡德相似系数","default":-1},"testCases":{"items":{"$ref":"#/components/schemas/TestCase"},"type":"array","title":"Testcases","description":"测试用例列表","default":[]}},"type":"object","title":"TestingTestCase","description":"GET /testing/testcase 数据结构"},"Tokenizer":{"type":"string","enum":["中文","en","mix"],"title":"Tokenizer","description":"分词器"},"TypeAction":{"properties":{"actionType":{"$ref":"#/components/schemas/ActionType"},"actions":{"items":{"$ref":"#/components/schemas/action"},"type":"array","title":"Actions","description":"操作列表","default":[]}},"type":"object","required":["actionType"],"title":"TypeAction","description":"不同类别的类别操作"},"UpdateChunkRequest":{"properties":{"text":{"type":"string","minLength":1,"title":"Text","description":"分块文本内容"},"enabled":{"type":"boolean","title":"Enabled","description":"分块是否启用","default":true}},"type":"object","required":["text"],"title":"UpdateChunkRequest"},"UpdateChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"分片ID"}},"type":"object","title":"UpdateChunkResponse","description":"PUT /chunk 响应"},"UpdateDataRequest":{"properties":{"question":{"type":"string","maxLength":200,"minLength":1,"title":"Question","description":"问题","default":"这是一个默认的问题"},"answer":{"type":"string","maxLength":1000,"minLength":1,"title":"Answer","description":"答案","default":"这是一个默认的答案"}},"type":"object","title":"UpdateDataRequest"},"UpdateDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据ID"}},"type":"object","title":"UpdateDataResponse","description":"PUT /dataset/data 响应"},"UpdateDatasetRequest":{"properties":{"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""}},"type":"object","title":"UpdateDatasetRequest"},"UpdateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"UpdateDatasetResponse","description":"PUT /dataset 响应"},"UpdateDocumentRequest":{"properties":{"docName":{"type":"string","maxLength":150,"minLength":1,"title":"Docname","default":"这是一个默认的文档名称"},"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"chunkSize":{"type":"integer","title":"Chunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用","default":true}},"type":"object","title":"UpdateDocumentRequest"},"UpdateDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","format":"uuid","title":"Result","description":"文档ID"}},"type":"object","title":"UpdateDocumentResponse","description":"PUT /doc 响应"},"UpdateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":30,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"UpdateKnowledgeBaseRequest"},"UpdateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"UpdateKnowledgeBaseResponse","description":"PUT /kb 响应"},"UpdateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"UpdateRoleRequest"},"UpdateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"UpdateRoleResponse","description":"PUT /role 响应"},"UpdateTeamAuthorResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamAuthorResponse","description":"PUT /team/author 响应"},"UpdateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"UpdateTeamRequest"},"UpdateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamResponse","description":"PUT /team 响应"},"UpdateTeamUserRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队成员ID"}},"type":"object","title":"UpdateTeamUserRoleResponse","description":"PUT /team/usr 响应"},"UpdateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":150,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_and_vector"},"topK":{"type":"integer","title":"Topk","description":"测试中检索方法关联的片段数量","default":5}},"type":"object","required":["llmId"],"title":"UpdateTestingRequest"},"UpdateTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"UpdateTestingResponse","description":"PUT /testing 响应"},"UpdateUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"消息ID"}},"type":"object","title":"UpdateUserMessageResponse","description":"PUT /usr_msg 响应"},"UploadDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"UploadDocumentResponse","description":"POST /doc 响应"},"UserMessageStatus":{"type":"string","enum":["unread","accepted","rejected"],"title":"UserMessageStatus","description":"用户消息状态"},"UserMessageType":{"type":"string","enum":["invitation","application"],"title":"UserMessageType","description":"用户消息类型"},"UserMsg":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"senderId":{"type":"string","format":"uuid","title":"Senderid","description":"发送者ID"},"senderName":{"type":"string","title":"Sendername","description":"发送者名称"},"receiverId":{"type":"string","format":"uuid","title":"Receiverid","description":"接收者ID"},"receiverName":{"type":"string","title":"Receivername","description":"接收者名称"},"msgType":{"allOf":[{"$ref":"#/components/schemas/UserMessageType"}],"description":"消息类型"},"msgStatus":{"allOf":[{"$ref":"#/components/schemas/UserMessageStatus"}],"description":"消息状态"},"createdTime":{"type":"string","title":"Createdtime","description":"创建时间"}},"type":"object","required":["teamId","msgId","senderId","senderName","receiverId","receiverName","msgType","msgStatus","createdTime"],"title":"UserMsg","description":"用户消息"},"UserStatus":{"type":"string","enum":["active","inactive","deleted"],"title":"UserStatus","description":"用户状态"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"action":{"properties":{"actionName":{"type":"string","title":"Actionname","description":"操作名称","max":20,"min":1},"action":{"type":"string","title":"Action","description":"操作","max":20,"min":1},"isUsed":{"type":"boolean","title":"Isused","description":"是否启用"}},"type":"object","required":["actionName","action","isUsed"],"title":"action","description":"操作信息"},"role":{"properties":{"roleId":{"type":"string","format":"uuid","title":"Roleid","description":"角色ID"},"roleName":{"type":"string","title":"Rolename","description":"角色名称","max":20,"min":1},"typeActions":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Typeactions","description":"操作类型列表","default":[]}},"type":"object","required":["roleId","roleName"],"title":"role","description":"角色信息"}}}} \ No newline at end of file +{"openapi":"3.1.0","info":{"title":"FastAPI","version":"0.1.0"},"paths":{"/routes":{"get":{"summary":"Get All Routes","operationId":"get_all_routes_routes_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/team/list":{"post":{"tags":["Team"],"summary":"List Teams","operationId":"list_teams_team_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/usr":{"post":{"tags":["Team"],"summary":"List Team User By Team Id","operationId":"list_team_user_by_team_id_team_usr_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_usr_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamUserRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team User By Team Id And User Subs","operationId":"delete_team_user_by_team_id_and_user_subs_team_usr_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string"},"title":"Usersub"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/msg":{"post":{"tags":["Team"],"summary":"List Team Msg By Team Id","operationId":"list_team_msg_by_team_id_team_msg_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team":{"post":{"tags":["Team"],"summary":"Create Team","operationId":"create_team_team_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team By Team Id","operationId":"delete_team_by_team_id_team_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/invitation":{"post":{"tags":["Team"],"summary":"Invite Team User By User Sub","operationId":"invite_team_user_by_user_sub_team_invitation_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"userSubInvite","in":"query","required":true,"schema":{"type":"string","title":"Usersubinvite"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/InviteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/application":{"post":{"tags":["Team"],"summary":"Join Team","operationId":"join_team_team_application_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JoinTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/author":{"put":{"tags":["Team"],"summary":"Update Team Author By Team Id","operationId":"update_team_author_by_team_id_team_author_put","parameters":[{"name":"recriverSub","in":"query","required":true,"schema":{"type":"string","title":"Recriversub"}},{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamAuthorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb":{"get":{"tags":["Knowledge Base"],"summary":"List Kb By User Sub","operationId":"list_kb_by_user_sub_kb_get","parameters":[{"name":"kbName","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListAllKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"tags":["Knowledge Base"],"summary":"Create Kb","operationId":"create_kb_kb_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Knowledge Base"],"summary":"Update Kb By Kb Id","operationId":"update_kb_by_kb_id_kb_put","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Knowledge Base"],"summary":"Delete Kb By Kb Ids","operationId":"delete_kb_by_kb_ids_kb_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/team":{"post":{"tags":["Knowledge Base"],"summary":"List Kb By Team Id","operationId":"list_kb_by_team_id_kb_team_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/doc_type":{"get":{"tags":["Knowledge Base"],"summary":"List Doc Types By Kb Id","operationId":"list_doc_types_by_kb_id_kb_doc_type_get","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentTypesResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/download":{"get":{"tags":["Knowledge Base"],"summary":"Download Kb By Task Id","operationId":"download_kb_by_task_id_kb_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/import":{"post":{"tags":["Knowledge Base"],"summary":"Import Kbs","operationId":"import_kbs_kb_import_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_kbs_kb_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/export":{"post":{"tags":["Knowledge Base"],"summary":"Export Kb By Kb Ids","operationId":"export_kb_by_kb_ids_kb_export_post","parameters":[{"name":"kbIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/list":{"post":{"tags":["Chunk"],"summary":"List Chunks By Document Id","operationId":"list_chunks_by_document_id_chunk_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/search":{"post":{"tags":["Chunk"],"summary":"Search Chunks","operationId":"search_chunks_chunk_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/text":{"put":{"tags":["Chunk"],"summary":"Update Chunk By Id","operationId":"update_chunk_by_id_chunk_text_put","parameters":[{"name":"chunkId","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Chunkid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/switch":{"put":{"tags":["Chunk"],"summary":"Update Chunk Enabled By Id","operationId":"update_chunk_enabled_by_id_chunk_switch_put","parameters":[{"name":"enabled","in":"query","required":true,"schema":{"type":"boolean","title":"Enabled"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Chunkid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkEnabledResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/list":{"post":{"tags":["Document"],"summary":"List Doc","operationId":"list_doc_doc_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/download":{"get":{"tags":["Document"],"summary":"Download Doc By Id","operationId":"download_doc_by_id_doc_download_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/report":{"get":{"tags":["Document"],"summary":"Get Doc Report","operationId":"get_doc_report_doc_report_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetDocumentReportResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc":{"post":{"tags":["Document"],"summary":"Upload Docs","operationId":"upload_docs_doc_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_upload_docs_doc_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UploadDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Document"],"summary":"Update Doc By Doc Id","operationId":"update_doc_by_doc_id_doc_put","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/parse":{"post":{"tags":["Document"],"summary":"Parse Docuement By Doc Ids","operationId":"parse_docuement_by_doc_ids_doc_parse_post","parameters":[{"name":"docIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}},{"name":"parse","in":"query","required":true,"schema":{"type":"boolean","title":"Parse"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ParseDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/switch":{"put":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_switch_put","requestBody":{"content":{"application/json":{"schema":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Docids"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/health_check":{"get":{"tags":["Health check"],"summary":"Health Check","operationId":"health_check_health_check_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/dataset/list":{"post":{"tags":["Dataset"],"summary":"List Dataset By Kb Id","operationId":"list_dataset_by_kb_id_dataset_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/data":{"post":{"tags":["Dataset"],"summary":"List Data In Dataset","operationId":"list_data_in_dataset_dataset_data_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Data By Dataset Id","operationId":"update_data_by_dataset_id_dataset_data_put","parameters":[{"name":"dataId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Dataid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Data By Data Ids","operationId":"delete_data_by_data_ids_dataset_data_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Dataids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/testing/exist":{"get":{"tags":["Dataset"],"summary":"Is Dataset Have Testing","operationId":"is_dataset_have_testing_dataset_testing_exist_get","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/IsDatasetHaveTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/download":{"get":{"tags":["Dataset"],"summary":"Download Dataset By Task Id","operationId":"download_dataset_by_task_id_dataset_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset":{"post":{"tags":["Dataset"],"summary":"Create Dataset","operationId":"create_dataset_dataset_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Dataset By Dataset Id","operationId":"update_dataset_by_dataset_id_dataset_put","parameters":[{"name":"databaseId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Databaseid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Dataset By Dataset Ids","operationId":"delete_dataset_by_dataset_ids_dataset_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Databaseid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/import":{"post":{"tags":["Dataset"],"summary":"Import Dataset","operationId":"import_dataset_dataset_import_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_dataset_dataset_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/export":{"post":{"tags":["Dataset"],"summary":"Export Dataset By Dataset Ids","operationId":"export_dataset_by_dataset_ids_dataset_export_post","parameters":[{"name":"datasetIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Datasetids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/generate":{"post":{"tags":["Dataset"],"summary":"Generate Dataset By Id","operationId":"generate_dataset_by_id_dataset_generate_post","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}},{"name":"generate","in":"query","required":true,"schema":{"type":"boolean","title":"Generate"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/other/llm":{"get":{"tags":["Other"],"summary":"List Llms By User Sub","operationId":"list_llms_by_user_sub_other_llm_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListLLMResponse"}}}}}}},"/other/embedding":{"get":{"tags":["Other"],"summary":"List Embeddings","operationId":"list_embeddings_other_embedding_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListEmbeddingResponse"}}}}}}},"/other/tokenizer":{"get":{"tags":["Other"],"summary":"List Tokenizers","operationId":"list_tokenizers_other_tokenizer_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTokenizerResponse"}}}}}}},"/other/parse_method":{"get":{"tags":["Other"],"summary":"List Parse Method","operationId":"list_parse_method_other_parse_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListParseMethodResponse"}}}}}}},"/other/search_method":{"get":{"tags":["Other"],"summary":"List Search Method","operationId":"list_search_method_other_search_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListSearchMethodResponse"}}}}}}},"/testing/list":{"post":{"tags":["Testing"],"summary":"List Testing By Kb Id","operationId":"list_testing_by_kb_id_testing_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/testcase":{"post":{"tags":["Testing"],"summary":"List Testcase By Testing Id","operationId":"list_testcase_by_testing_id_testing_testcase_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestCaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/download":{"get":{"tags":["Testing"],"summary":"Download Testing Report By Testing Id","operationId":"download_testing_report_by_testing_id_testing_download_get","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing":{"post":{"tags":["Testing"],"summary":"Create Testing","operationId":"create_testing_testing_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingResponsing"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Testing"],"summary":"Update Testing By Testing Id","operationId":"update_testing_by_testing_id_testing_put","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Testing"],"summary":"Delete Testing By Testing Ids","operationId":"delete_testing_by_testing_ids_testing_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Testingids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/run":{"post":{"tags":["Testing"],"summary":"Run Testing By Testing Id","operationId":"run_testing_by_testing_id_testing_run_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}},{"name":"run","in":"query","required":true,"schema":{"type":"boolean","title":"Run"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/RunTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role/action":{"get":{"tags":["Role"],"summary":"List Actions","operationId":"list_actions_role_action_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListActionResponse"}}}}}}},"/role/list":{"post":{"tags":["Role"],"summary":"List Role By Team Id","operationId":"list_role_by_team_id_role_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role":{"post":{"tags":["Role"],"summary":"Create Role","operationId":"create_role_role_post","parameters":[{"name":"TeamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Role"],"summary":"Update Role By Role Id","operationId":"update_role_by_role_id_role_put","parameters":[{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Role"],"summary":"Delete Role By Role Ids","operationId":"delete_role_by_role_ids_role_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Roleid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg/list":{"post":{"tags":["User Message"],"summary":"List User Msgs By User Sub","operationId":"list_user_msgs_by_user_sub_usr_msg_list_post","parameters":[{"name":"msgType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserMessageType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg":{"put":{"tags":["User Message"],"summary":"Update User Msg By Msg Id","operationId":"update_user_msg_by_msg_id_usr_msg_put","parameters":[{"name":"msgId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Msgid"}},{"name":"msgStatus","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserStatus"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["User Message"],"summary":"Delete User Msg By Msg Ids","operationId":"delete_user_msg_by_msg_ids_usr_msg_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Msgids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task":{"post":{"tags":["Task"],"summary":"List Task","operationId":"list_task_task_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/one":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Id","operationId":"delete_task_by_task_id_task_one_delete","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByIdResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/all":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Type","operationId":"delete_task_by_task_type_task_all_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"taskType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/TaskType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByTypeResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"ActionType":{"type":"string","enum":["team","user","role","knowledge_base","document","chunk","dataset","testing","dataset_data","task"],"title":"ActionType","description":"操作类型"},"Body_import_dataset_dataset_import_post":{"properties":{"dataset_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Dataset Packages"}},"type":"object","required":["dataset_packages"],"title":"Body_import_dataset_dataset_import_post"},"Body_import_kbs_kb_import_post":{"properties":{"kb_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Kb Packages"}},"type":"object","required":["kb_packages"],"title":"Body_import_kbs_kb_import_post"},"Body_upload_docs_doc_post":{"properties":{"docs":{"items":{"type":"string","format":"binary"},"type":"array","title":"Docs"}},"type":"object","required":["docs"],"title":"Body_upload_docs_doc_post"},"Chunk":{"properties":{"chunkId":{"type":"string","format":"uuid","title":"Chunkid","description":"分片ID"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"分片类型"},"text":{"type":"string","title":"Text","description":"分片文本"}},"type":"object","required":["chunkId","chunkType","text"],"title":"Chunk","description":"文档分片信息"},"ChunkType":{"type":"string","enum":["text","table","image","code","link","qa"],"title":"ChunkType","description":"分块类型"},"CreateDatasetRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""},"documentIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Documentids","description":"测试数据集关联的文档","default":[]},"dataCnt":{"type":"integer","title":"Datacnt","description":"测试数据集内的数据数量","default":64,"max":512,"min":1},"llmId":{"type":"string","title":"Llmid","description":"测试数据集使用的大模型id"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"测试数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"测试数据集进行上下文关联","default":false}},"type":"object","required":["kbId","llmId"],"title":"CreateDatasetRequest"},"CreateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集生成任务ID"}},"type":"object","title":"CreateDatasetResponse","description":"POST /dataset 响应"},"CreateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":20,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"知识库使用的embedding模型","default":""},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"CreateKnowledgeBaseRequest"},"CreateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"CreateKnowledgeBaseResponse","description":"POST /kb 响应"},"CreateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"CreateRoleRequest"},"CreateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"CreateRoleResponse","description":"POST /role 响应"},"CreateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"CreateTeamRequest"},"CreateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"CreateTeamResponse","description":"POST /team 响应"},"CreateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":30,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"测试数据集id"},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_and_vector"},"topK":{"type":"integer","title":"Topk","description":"测试中检索方法关联的片段数量","default":5}},"type":"object","required":["datasetId","llmId"],"title":"CreateTestingRequest"},"CreateTestingResponsing":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"CreateTestingResponsing","description":"POST /testing 响应"},"Data":{"properties":{"dataId":{"type":"string","format":"uuid","title":"Dataid","description":"数据ID"},"docName":{"type":"string","title":"Docname","description":"数据关联的文档名称"},"question":{"type":"string","title":"Question","description":"数据的问题"},"answer":{"type":"string","title":"Answer","description":"数据的答案"},"chunk":{"type":"string","title":"Chunk","description":"数据的片段"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"数据的片段类型"}},"type":"object","required":["dataId","docName","question","answer","chunk","chunkType"],"title":"Data"},"DataSetStatus":{"type":"string","enum":["idle","pending","generating","exporting","importing","deleted"],"title":"DataSetStatus","description":"数据集状态"},"Dataset":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"数据集描述","max":150},"dataCnt":{"type":"integer","title":"Datacnt","description":"数据集数据数量"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"数据集进行上下文关联","default":false},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"生成数据集使用的大模型信息"},"generateTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"数据集生成任务"},"score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Score","description":"数据集评分"},"authorName":{"type":"string","title":"Authorname","description":"数据集创建者的用户名"},"status":{"$ref":"#/components/schemas/DataSetStatus"}},"type":"object","required":["datasetId","datasetName","description","dataCnt","authorName","status"],"title":"Dataset","description":"数据集信息"},"DatasetTesting":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称"},"testings":{"items":{"$ref":"#/components/schemas/Testing"},"type":"array","title":"Testings","description":"测试列表","default":[]}},"type":"object","required":["datasetId","datasetName"],"title":"DatasetTesting","description":"数据集测试信息"},"DeleteDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据ID列表","default":[]}},"type":"object","title":"DeleteDataResponse","description":"DELETE /dataset/data 响应"},"DeleteDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据集ID列表","default":[]}},"type":"object","title":"DeleteDatasetResponse","description":"DELETE /dataset 响应"},"DeleteDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"DeleteDocumentResponse","description":"DELETE /doc 响应"},"DeleteKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"知识库ID列表","default":[]}},"type":"object","title":"DeleteKnowledgeBaseResponse","description":"DELETE /kb 响应"},"DeleteRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"角色ID列表","default":[]}},"type":"object","title":"DeleteRoleResponse","description":"DELETE /role 响应"},"DeleteTaskByIdResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"任务ID"}},"type":"object","title":"DeleteTaskByIdResponse","description":"DELETE /task/one 响应"},"DeleteTaskByTypeResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"DeleteTaskByTypeResponse","description":"DELETE /task/all 响应"},"DeleteTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"DeleteTeamResponse","description":"DELETE /team 响应"},"DeleteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"团队成员ID列表","default":[]}},"type":"object","title":"DeleteTeamUserResponse","description":"DELETE /team/usr 响应"},"DeleteTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"测试ID列表","default":[]}},"type":"object","title":"DeleteTestingResponse","description":"DELETE /testing 响应"},"DeleteUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"消息ID列表","default":[]}},"type":"object","title":"DeleteUserMessageResponse","description":"DELETE /usr_msg 响应"},"DocChunk":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","required":["docId","docName"],"title":"DocChunk","description":"Post /chunk/search 数据结构"},"Document":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"docType":{"allOf":[{"$ref":"#/components/schemas/DocumentType-Output"}],"description":"文档类型"},"chunkSize":{"type":"integer","title":"Chunksize","description":"文档分片大小"},"createdTime":{"type":"string","title":"Createdtime","description":"文档创建时间"},"docTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"文档任务"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"文档解析方法"},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用"},"authorName":{"type":"string","title":"Authorname","description":"文档创建者的用户名"},"status":{"$ref":"#/components/schemas/DocumentStatus"}},"type":"object","required":["docId","docName","docType","chunkSize","createdTime","parseMethod","enabled","authorName","status"],"title":"Document","description":"文档信息"},"DocumentStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"DocumentStatus","description":"文档状态"},"DocumentType-Input":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"docTypeName":{"type":"string","maxLength":20,"minLength":1,"title":"Doctypename","default":"这是一个默认的文档类型名称"}},"type":"object","title":"DocumentType"},"DocumentType-Output":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型ID"},"docTypeName":{"type":"string","title":"Doctypename","description":"文档类型名称"}},"type":"object","required":["docTypeId","docTypeName"],"title":"DocumentType","description":"文档类型信息"},"ExportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ExportDatasetResponse","description":"POST /dataset/export 响应"},"ExportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ExportKnowledgeBaseResponse","description":"POST /kb/export 响应"},"GenerateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"GenerateDatasetResponse","description":"POST /dataset/generate 响应"},"GetDocumentReportResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","title":"Result","description":"文档报告数据结构","default":""}},"type":"object","title":"GetDocumentReportResponse","description":"GET /doc/report 响应"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ImportDatasetResponse","description":"POST /dataset/import 响应"},"ImportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ImportKnowledgeBaseResponse","description":"POST /kb/import 响应"},"InviteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"邀请ID"}},"type":"object","title":"InviteTeamUserResponse","description":"POST /team/invitation 响应"},"IsDatasetHaveTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"boolean","title":"Result","description":"数据集是否有测试任务","default":false}},"type":"object","title":"IsDatasetHaveTestingResponse","description":"GET /dataset/testing/exist 响应"},"JoinTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"申请ID"}},"type":"object","title":"JoinTeamResponse","description":"POST /team/application 响应"},"Knowledgebase":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"知识库ID"},"kbName":{"type":"string","title":"Kbname","description":"知识库名称","max":20,"min":1},"authorName":{"type":"string","title":"Authorname","description":"知识库创建者的用户名"},"tokenizer":{"$ref":"#/components/schemas/Tokenizer"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"嵌入模型"},"description":{"type":"string","title":"Description","description":"知识库描述","max":150},"docCnt":{"type":"integer","title":"Doccnt","description":"知识库文档数量"},"docSize":{"type":"integer","title":"Docsize","description":"知识库文档大小"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库单次文件上传数量限制"},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库单次文件上传大小限制"},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"默认解析方法"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"默认分块大小"},"createdTime":{"type":"string","title":"Createdtime","description":"知识库创建时间"},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Doctypes","description":"知识库文档类型列表","default":[]}},"type":"object","required":["kbId","kbName","authorName","tokenizer","embeddingModel","description","docCnt","docSize","uploadCountLimit","uploadSizeLimit","defaultParseMethod","defaultChunkSize","createdTime"],"title":"Knowledgebase","description":"知识库信息"},"LLM":{"properties":{"llmId":{"type":"string","title":"Llmid","description":"大模型ID"},"llmName":{"type":"string","title":"Llmname","description":"大模型名称","max":20,"min":1},"llmIcon":{"type":"string","title":"Llmicon","description":"大模型图标"}},"type":"object","required":["llmId","llmName","llmIcon"],"title":"LLM"},"ListActionMsg":{"properties":{"actionTypes":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Actiontypes","description":"操作类型列表","default":[]}},"type":"object","title":"ListActionMsg","description":"GET /role/action 数据结构"},"ListActionResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListActionMsg"}],"description":"操作列表数据结构","default":{"actionTypes":[]}}},"type":"object","title":"ListActionResponse"},"ListAllKnowledgeBaseMsg":{"properties":{"teamKnowledgebases":{"items":{"$ref":"#/components/schemas/TeamKnowledgebase"},"type":"array","title":"Teamknowledgebases","description":"团队知识库列表","default":[]}},"type":"object","title":"ListAllKnowledgeBaseMsg","description":"GET /kb 数据结构"},"ListAllKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListAllKnowledgeBaseMsg"}],"description":"团队知识库列表数据结构","default":{"teamKnowledgebases":[]}}},"type":"object","title":"ListAllKnowledgeBaseResponse","description":"GET /kb 响应"},"ListChunkMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","title":"ListChunkMsg","description":"GET /chunk 数据结构"},"ListChunkRequest":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档id"},"text":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Text","description":"分块文本内容"},"types":{"anyOf":[{"items":{"$ref":"#/components/schemas/ChunkType"},"type":"array"},{"type":"null"}],"title":"Types","description":"分块类型"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["docId"],"title":"ListChunkRequest"},"ListChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListChunkMsg"}],"description":"分片列表数据结构","default":{"total":0,"chunks":[]}}},"type":"object","title":"ListChunkResponse","description":"GET /chunk 响应"},"ListDataInDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"data":{"items":{"$ref":"#/components/schemas/Data"},"type":"array","title":"Data","description":"数据列表","default":[]}},"type":"object","title":"ListDataInDatasetMsg","description":"GET /dataset/data 数据结构"},"ListDataInDatasetRequest":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["datasetId"],"title":"ListDataInDatasetRequest"},"ListDataInDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDataInDatasetMsg"}],"description":"数据列表数据结构","default":{"total":0,"data":[]}}},"type":"object","title":"ListDataInDatasetResponse","description":"GET /dataset/data 响应"},"ListDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasets":{"items":{"$ref":"#/components/schemas/Dataset"},"type":"array","title":"Datasets","description":"数据集列表","default":[]}},"type":"object","title":"ListDatasetMsg","description":"GET /dataset 数据结构"},"ListDatasetRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"datasetId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Datasetid","description":"数据集id"},"datasetName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Datasetname","description":"数据集名称"},"dataCnt":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集数据数量","default":"desc"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"数据集使用的大模型id"},"isDataCleared":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Isdatacleared","description":"数据集是否清洗"},"isChunkRelated":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Ischunkrelated","description":"数据集是否上下文关联"},"generateStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Generatestatus","description":"数据集生成状态"},"scoreOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集评分的排序方法","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"数据集创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListDatasetRequest"},"ListDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDatasetMsg"}],"description":"数据集列表数据结构","default":{"total":0,"datasets":[]}}},"type":"object","title":"ListDatasetResponse","description":"GET /dataset 响应"},"ListDocumentMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"documents":{"items":{"$ref":"#/components/schemas/Document"},"type":"array","title":"Documents","description":"文档列表","default":[]}},"type":"object","title":"ListDocumentMsg","description":"GET /doc 数据结构"},"ListDocumentRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"docId":{"anyOf":[{"type":"string","maxLength":30,"minLength":1,"format":"uuid"},{"type":"null"}],"title":"Docid","description":"文档id"},"docName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Docname","description":"文档名称"},"docTypeId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Doctypeid","description":"文档类型id"},"parseStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Parsestatus","description":"文档解析状态"},"parseMethod":{"anyOf":[{"$ref":"#/components/schemas/ParseMethod"},{"type":"null"}],"description":"文档解析方法"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"文档创建者"},"createdTimeStart":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimestart","description":"文档创建时间开始"},"createdTimeEnd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimeend","description":"文档创建时间结束"},"createdTimeOrder":{"allOf":[{"$ref":"#/components/schemas/OrderType"}],"description":"文档创建时间排序","default":"desc"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListDocumentRequest"},"ListDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDocumentMsg"}],"description":"文档列表数据结构","default":{"total":0,"documents":[]}}},"type":"object","title":"ListDocumentResponse","description":"GET /doc 响应"},"ListDocumentTypesResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Result","description":"文档类型列表数据结构","default":[]}},"type":"object","title":"ListDocumentTypesResponse","description":"GET /kb/doc_type 响应"},"ListEmbeddingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"向量化模型的列表数据结构","default":[]}},"type":"object","title":"ListEmbeddingResponse","description":"GET /other/embedding 数据结构"},"ListKnowledgeBaseMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表数据结构","default":[]}},"type":"object","title":"ListKnowledgeBaseMsg"},"ListKnowledgeBaseRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"kbName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname","description":"资产名称"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"资产创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListKnowledgeBaseRequest"},"ListKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListKnowledgeBaseMsg"}],"default":{"total":0,"kbList":[]}}},"type":"object","title":"ListKnowledgeBaseResponse","description":"GET /kb/team 响应"},"ListLLMMsg":{"properties":{"llms":{"items":{"$ref":"#/components/schemas/LLM"},"type":"array","title":"Llms","description":"大模型列表","default":[]}},"type":"object","title":"ListLLMMsg","description":"GET /other/llm 数据结构"},"ListLLMResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListLLMMsg"}],"description":"大模型列表数据结构","default":{"llms":[]}}},"type":"object","title":"ListLLMResponse","description":"GET /other/llm 响应"},"ListParseMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"解析方法的列表数据结构","default":[]}},"type":"object","title":"ListParseMethodResponse","description":"\"GET /other/parse_method 响应"},"ListRoleMsg":{"properties":{"roles":{"items":{"$ref":"#/components/schemas/role"},"type":"array","title":"Roles","description":"角色列表","default":[]}},"type":"object","title":"ListRoleMsg","description":"GET /role 数据结构"},"ListRoleRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"roleId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Roleid","description":"角色id"},"roleName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rolename","description":"角色名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListRoleRequest"},"ListRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListRoleMsg"}],"description":"角色列表数据结构","default":{"roles":[]}}},"type":"object","title":"ListRoleResponse","description":"GET /role 响应"},"ListSearchMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"搜索方法的列表数据结构","default":[]}},"type":"object","title":"ListSearchMethodResponse","description":"GET /other/search_method 响应"},"ListTaskMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"tasks":{"items":{"$ref":"#/components/schemas/Task"},"type":"array","title":"Tasks","description":"任务列表","default":[]}},"type":"object","title":"ListTaskMsg","description":"GET /task 数据结构"},"ListTaskRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"taskId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Taskid","description":"任务id"},"taskType":{"anyOf":[{"$ref":"#/components/schemas/TaskType"},{"type":"null"}],"description":"任务类型"},"taskStatus":{"anyOf":[{"$ref":"#/components/schemas/TaskStatus"},{"type":"null"}],"description":"任务状态"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTaskRequest"},"ListTaskResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTaskMsg"}],"description":"任务列表数据结构","default":{"total":0,"tasks":[]}}},"type":"object","title":"ListTaskResponse","description":"GET /task 响应"},"ListTeamMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teams":{"items":{"$ref":"#/components/schemas/Team"},"type":"array","title":"Teams","description":"团队列表","default":[]}},"type":"object","title":"ListTeamMsg","description":"GET /team 数据结构"},"ListTeamMsgMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamMsgs":{"items":{"$ref":"#/components/schemas/TeamMsg"},"type":"array","title":"Teammsgs","description":"团队消息列表","default":[]}},"type":"object","title":"ListTeamMsgMsg","description":"GET /team/msg 数据结构"},"ListTeamMsgRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamMsgRequest"},"ListTeamMsgResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsgMsg"}],"description":"团队消息列表数据结构","default":{"total":0,"teamMsgs":[]}}},"type":"object","title":"ListTeamMsgResponse"},"ListTeamRequest":{"properties":{"teamType":{"allOf":[{"$ref":"#/components/schemas/TeamType"}],"description":"团队类型","default":"all"},"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"teamName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Teamname","description":"团队名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamRequest"},"ListTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsg"}],"description":"团队列表数据结构","default":{"total":0,"teams":[]}}},"type":"object","title":"ListTeamResponse","description":"GET /team 响应"},"ListTeamUserMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamUsers":{"items":{"$ref":"#/components/schemas/TeamUser"},"type":"array","title":"Teamusers","description":"团队成员列表","default":[]}},"type":"object","title":"ListTeamUserMsg","description":"GET /team/usr 数据结构"},"ListTeamUserRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"userSub":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Usersub","description":"用户ID"},"userName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Username","description":"用户名"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListTeamUserRequest"},"ListTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamUserMsg"}],"description":"团队成员列表数据结构","default":{"total":0,"teamUsers":[]}}},"type":"object","title":"ListTeamUserResponse"},"ListTestCaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/TestingTestCase"}],"description":"测试用例列表数据结构","default":{"aveScore":-1.0,"avePre":-1.0,"aveRec":-1.0,"aveFai":-1.0,"aveRel":-1.0,"aveLcs":-1.0,"aveLeve":-1.0,"aveJac":-1.0,"testCases":[]}}},"type":"object","title":"ListTestCaseResponse","description":"GET /testing/testcase 响应"},"ListTestingMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasetTestings":{"items":{"$ref":"#/components/schemas/DatasetTesting"},"type":"array","title":"Datasettestings","description":"数据集测试列表","default":[]}},"type":"object","title":"ListTestingMsg","description":"GET /testing 数据结构"},"ListTestingRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"testingId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Testingid","description":"测试id"},"testingName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Testingname","description":"测试名称"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"测试使用的大模型id"},"runStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Runstatus","description":"测试运行状态"},"scoresOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"测试评分","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"测试创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTestingRequest"},"ListTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTestingMsg"}],"description":"测试列表数据结构","default":{"total":0,"datasetTestings":[]}}},"type":"object","title":"ListTestingResponse","description":"GET /testing 响应"},"ListTokenizerResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"分词器的列表数据结构","default":[]}},"type":"object","title":"ListTokenizerResponse","description":"GET /other/tokenizer 响应"},"ListUserMessageMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"userMessages":{"items":{"$ref":"#/components/schemas/UserMsg"},"type":"array","title":"Usermessages","description":"用户消息列表","default":[]}},"type":"object","title":"ListUserMessageMsg","description":"GET /usr_msg 数据结构"},"ListUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListUserMessageMsg"}],"description":"用户消息列表数据结构","default":{"total":0,"userMessages":[]}}},"type":"object","title":"ListUserMessageResponse"},"OrderType":{"type":"string","enum":["asc","desc"],"title":"OrderType","description":"排序"},"ParseDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"ParseDocumentResponse","description":"POST /doc/parse 响应"},"ParseMethod":{"type":"string","enum":["general","ocr","enhanced","qa"],"title":"ParseMethod","description":"解析方法"},"RunTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"RunTestingResponse","description":"POST /testing/run 响应"},"SearchChunkMsg":{"properties":{"docChunks":{"items":{"$ref":"#/components/schemas/DocChunk"},"type":"array","title":"Docchunks","description":"文档分片列表","default":[]}},"type":"object","title":"SearchChunkMsg","description":"Post /chunk/search 数据结构"},"SearchChunkRequest":{"properties":{"kbIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Kbids","description":"资产id","default":[]},"query":{"type":"string","title":"Query","description":"查询内容","default":""},"topK":{"type":"integer","title":"Topk","description":"返回的结果数量","default":5},"docIds":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Docids","description":"文档id"},"bannedIds":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Bannedids","description":"禁止的分块id","default":[]},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"检索方法","default":"keyword_and_vector"},"isRelatedSurrounding":{"type":"boolean","title":"Isrelatedsurrounding","description":"是否关联上下文","default":true},"isClassifyByDoc":{"type":"boolean","title":"Isclassifybydoc","description":"是否按文档分类","default":false},"isRerank":{"type":"boolean","title":"Isrerank","description":"是否重新排序","default":false},"isCompress":{"type":"boolean","title":"Iscompress","description":"是否压缩","default":false},"tokensLimit":{"type":"integer","title":"Tokenslimit","description":"token限制","default":8192}},"type":"object","title":"SearchChunkRequest"},"SearchChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/SearchChunkMsg"}],"description":"文档分片列表数据结构","default":{"docChunks":[]}}},"type":"object","title":"SearchChunkResponse","description":"POST /chunk/search 响应"},"SearchMethod":{"type":"string","enum":["keyword","vector","keyword_and_vector","doc2chunk","doc2chunk_bfs","enhanced_by_llm"],"title":"SearchMethod","description":"搜索方法"},"Task":{"properties":{"opId":{"type":"string","format":"uuid","title":"Opid","description":"关联实体ID"},"opName":{"type":"string","title":"Opname","description":"关联实体的名称"},"taskId":{"type":"string","format":"uuid","title":"Taskid","description":"任务ID"},"taskStatus":{"$ref":"#/components/schemas/TaskStatus"},"taskType":{"$ref":"#/components/schemas/TaskType"},"taskCompleted":{"type":"number","title":"Taskcompleted","description":"任务完成度"},"createdTime":{"type":"string","title":"Createdtime","description":"任务创建时间"}},"type":"object","required":["opId","opName","taskId","taskStatus","taskType","taskCompleted","createdTime"],"title":"Task","description":"任务信息"},"TaskStatus":{"type":"string","enum":["pending","running","success","failed","canceled","deleted"],"title":"TaskStatus","description":"任务状态"},"TaskType":{"type":"string","enum":["doc_parse","kb_export","kb_import","dataset_export","dataset_import","dataset_generate","testing_run"],"title":"TaskType","description":"任务类型"},"Team":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","description":"团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","description":"团队描述"},"authorName":{"type":"string","title":"Authorname","description":"团队创建者的用户ID"},"memberCount":{"type":"integer","title":"Membercount","description":"团队成员数量"},"isPublic":{"type":"boolean","title":"Ispublic","description":"是否为公开团队"},"createdTime":{"type":"string","title":"Createdtime","description":"团队创建时间"}},"type":"object","required":["teamId","teamName","description","authorName","memberCount","isPublic","createdTime"],"title":"Team","description":"团队信息"},"TeamKnowledgebase":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","title":"Teamname","description":"团队名称"},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表","default":[]}},"type":"object","required":["teamId","teamName"],"title":"TeamKnowledgebase","description":"团队知识库信息"},"TeamMsg":{"properties":{"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"authorName":{"type":"string","title":"Authorname","description":"消息发送者的用户名"},"message":{"type":"string","title":"Message","description":"消息内容"}},"type":"object","required":["msgId","authorName","message"],"title":"TeamMsg","description":"团队信息"},"TeamType":{"type":"string","enum":["mycreated","myjoined","all"],"title":"TeamType","description":"团队类型"},"TeamUser":{"properties":{"userId":{"type":"string","format":"uuid","title":"Userid","description":"用户ID"},"userName":{"type":"string","title":"Username","description":"用户名"},"roleName":{"type":"string","title":"Rolename","description":"角色名称"}},"type":"object","required":["userId","userName","roleName"],"title":"TeamUser","description":"团队成员信息"},"TestCase":{"properties":{"testCaseId":{"type":"string","format":"uuid","title":"Testcaseid","description":"测试用例ID"},"question":{"type":"string","title":"Question","description":"问题"},"answer":{"type":"string","title":"Answer","description":"标准答案"},"llm_answer":{"type":"string","title":"Llm Answer","description":"大模型的回答"},"related_chunk":{"type":"string","title":"Related Chunk","description":"检索到的片段"},"doc_name":{"type":"string","title":"Doc Name","description":"来源文档"},"score":{"type":"number","title":"Score","description":"综合得分"},"pre":{"type":"number","title":"Pre","description":"精确率"},"rec":{"type":"number","title":"Rec","description":"召回率"},"fai":{"type":"number","title":"Fai","description":"忠实值"},"rel":{"type":"number","title":"Rel","description":"可解释性"},"lcs":{"type":"number","title":"Lcs","description":"最长公共子串得分"},"leve":{"type":"number","title":"Leve","description":"编辑距离得分"},"jac":{"type":"number","title":"Jac","description":"杰卡德相似系数"}},"type":"object","required":["testCaseId","question","answer","llm_answer","related_chunk","doc_name","score","pre","rec","fai","rel","lcs","leve","jac"],"title":"TestCase","description":"测试用例信息"},"Testing":{"properties":{"testingId":{"type":"string","format":"uuid","title":"Testingid","description":"测试ID"},"testingName":{"type":"string","title":"Testingname","description":"测试名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"测试描述","max":150},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"测试使用的大模型信息"},"searchMethod":{"$ref":"#/components/schemas/SearchMethod"},"testingTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"测试任务"},"aveScore":{"type":"number","title":"Avescore","description":"综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"杰卡德相似系数","default":-1},"authorName":{"type":"string","title":"Authorname","description":"测试创建者的用户名"},"status":{"$ref":"#/components/schemas/TestingStatus"}},"type":"object","required":["testingId","testingName","description","searchMethod","authorName","status"],"title":"Testing","description":"测试信息"},"TestingStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"TestingStatus","description":"测试状态"},"TestingTestCase":{"properties":{"aveScore":{"type":"number","title":"Avescore","description":"平均综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"平均精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"平均召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"平均忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"平均可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"平均最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"平均编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"平均杰卡德相似系数","default":-1},"testCases":{"items":{"$ref":"#/components/schemas/TestCase"},"type":"array","title":"Testcases","description":"测试用例列表","default":[]}},"type":"object","title":"TestingTestCase","description":"GET /testing/testcase 数据结构"},"Tokenizer":{"type":"string","enum":["中文","en","mix"],"title":"Tokenizer","description":"分词器"},"TypeAction":{"properties":{"actionType":{"$ref":"#/components/schemas/ActionType"},"actions":{"items":{"$ref":"#/components/schemas/action"},"type":"array","title":"Actions","description":"操作列表","default":[]}},"type":"object","required":["actionType"],"title":"TypeAction","description":"不同类别的类别操作"},"UpdateChunkEnabledResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"分片ID列表","default":[]}},"type":"object","title":"UpdateChunkEnabledResponse","description":"PUT /chunk 响应"},"UpdateChunkRequest":{"properties":{"text":{"type":"string","title":"Text","description":"分块文本内容","default":"这是一个默认的分块文本"},"enabled":{"type":"boolean","title":"Enabled","description":"分块是否启用","default":true}},"type":"object","title":"UpdateChunkRequest"},"UpdateChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"分片ID"}},"type":"object","title":"UpdateChunkResponse","description":"PUT /chunk 响应"},"UpdateDataRequest":{"properties":{"question":{"type":"string","maxLength":200,"minLength":1,"title":"Question","description":"问题","default":"这是一个默认的问题"},"answer":{"type":"string","maxLength":1000,"minLength":1,"title":"Answer","description":"答案","default":"这是一个默认的答案"}},"type":"object","title":"UpdateDataRequest"},"UpdateDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据ID"}},"type":"object","title":"UpdateDataResponse","description":"PUT /dataset/data 响应"},"UpdateDatasetRequest":{"properties":{"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""}},"type":"object","title":"UpdateDatasetRequest"},"UpdateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"UpdateDatasetResponse","description":"PUT /dataset 响应"},"UpdateDocumentRequest":{"properties":{"docName":{"type":"string","maxLength":150,"minLength":1,"title":"Docname","default":"这是一个默认的文档名称"},"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"chunkSize":{"type":"integer","title":"Chunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用","default":true}},"type":"object","title":"UpdateDocumentRequest"},"UpdateDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","format":"uuid","title":"Result","description":"文档ID"}},"type":"object","title":"UpdateDocumentResponse","description":"PUT /doc 响应"},"UpdateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":30,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"UpdateKnowledgeBaseRequest"},"UpdateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"UpdateKnowledgeBaseResponse","description":"PUT /kb 响应"},"UpdateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"UpdateRoleRequest"},"UpdateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"UpdateRoleResponse","description":"PUT /role 响应"},"UpdateTeamAuthorResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamAuthorResponse","description":"PUT /team/author 响应"},"UpdateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"UpdateTeamRequest"},"UpdateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamResponse","description":"PUT /team 响应"},"UpdateTeamUserRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队成员ID"}},"type":"object","title":"UpdateTeamUserRoleResponse","description":"PUT /team/usr 响应"},"UpdateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":150,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_and_vector"},"topK":{"type":"integer","title":"Topk","description":"测试中检索方法关联的片段数量","default":5}},"type":"object","required":["llmId"],"title":"UpdateTestingRequest"},"UpdateTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"UpdateTestingResponse","description":"PUT /testing 响应"},"UpdateUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"消息ID"}},"type":"object","title":"UpdateUserMessageResponse","description":"PUT /usr_msg 响应"},"UploadDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"UploadDocumentResponse","description":"POST /doc 响应"},"UserMessageStatus":{"type":"string","enum":["unread","accepted","rejected"],"title":"UserMessageStatus","description":"用户消息状态"},"UserMessageType":{"type":"string","enum":["invitation","application"],"title":"UserMessageType","description":"用户消息类型"},"UserMsg":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"senderId":{"type":"string","format":"uuid","title":"Senderid","description":"发送者ID"},"senderName":{"type":"string","title":"Sendername","description":"发送者名称"},"receiverId":{"type":"string","format":"uuid","title":"Receiverid","description":"接收者ID"},"receiverName":{"type":"string","title":"Receivername","description":"接收者名称"},"msgType":{"allOf":[{"$ref":"#/components/schemas/UserMessageType"}],"description":"消息类型"},"msgStatus":{"allOf":[{"$ref":"#/components/schemas/UserMessageStatus"}],"description":"消息状态"},"createdTime":{"type":"string","title":"Createdtime","description":"创建时间"}},"type":"object","required":["teamId","msgId","senderId","senderName","receiverId","receiverName","msgType","msgStatus","createdTime"],"title":"UserMsg","description":"用户消息"},"UserStatus":{"type":"string","enum":["active","inactive","deleted"],"title":"UserStatus","description":"用户状态"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"action":{"properties":{"actionName":{"type":"string","title":"Actionname","description":"操作名称","max":20,"min":1},"action":{"type":"string","title":"Action","description":"操作","max":20,"min":1},"isUsed":{"type":"boolean","title":"Isused","description":"是否启用"}},"type":"object","required":["actionName","action","isUsed"],"title":"action","description":"操作信息"},"role":{"properties":{"roleId":{"type":"string","format":"uuid","title":"Roleid","description":"角色ID"},"roleName":{"type":"string","title":"Rolename","description":"角色名称","max":20,"min":1},"typeActions":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Typeactions","description":"操作类型列表","default":[]}},"type":"object","required":["roleId","roleName"],"title":"role","description":"角色信息"}}}} \ No newline at end of file diff --git a/test.jpeg b/test.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..b116153f405865964b06277cd0aeaeeed322b983 GIT binary patch literal 56781 zcmb5UhgVZu6E~a+Qk5oGV;m_GO{o^ zDXEiZPQp}FPMc2?Ky2 zDQWN-HDd@AX)REokw!%GocvZSbmg@?et9@9@V3ywyn8VPwq-{g^BK@;pk*;oxLdg-K<+Q3iM#?)=- zl>$@4+Wo-W@v6YIWl5vMDjsr@_^4DBRj6`lZNgV0t`)4KXk~XxA&j9{0B%BZHCK0{ zx9S@@07<864D#1PSr$C?8@9Fng(<+(?i)V~`hxe@gc^DY;?kY7<1-LyHA)^75uVj~ zQ9;_{O6zea)|bq*hF+0`2ASTXjW0l*sJV8)lHCY8)L&S~%Dha4WK%#{09JtcIWHI@Q|RWVVfq&fnze;5Y?!}o(JwPT1W~qjl~T{wxmW}O2Q>0Hu!kj_ z{Nt%Zv;F5XiQJ(0SBi#s)a5w2PI2|7*cFju+h0l_zA{a{gn;zh9$MHN5O&Ye7em#F zcf_t&5g+#oA+vH?s)q}<2^Z(l5^W^Z=J$jQSfe?PC1E+l$Xsv_{tB$?_AZm_K)%GF zAH<9=K&u>#q>IX=T%#-!44IIQ!24|l)w%+(yy2J^wrqDyOfiWJb6+D(z=1fkU+>}l zM7v^)&`Tok?Wo2sXh(8AuNv}&w5-%t*D6_69{l&%f*a`MC_3%T}moWq9|`^ zr!efZj0~Bl`Oz6F01G~U(nA;4M-tgkLTSi54NYyQmra^%p(489f{MCzU=6?TrELN- z{_4lPH{gT3tNB;$NxUv2MSI=&J*nOUiMW}?HRwbejBn>vaefFp;RK9DZX37(&$Bi? zLjdDjW4yPhm>tS8#Y9!oh-NWdlD99rBE5aT9Uj)paHkQ@V~9zIy`Qb&u%;0+cX|SPSk%D2G)FimAM=o)3OYK?)I2!tjx;z z*&SQ5tw=D{5b%i2(!i5%3IIVW!HEe{be%^VEdlxHYA!6vUv2umV5;3G#B&@=NG_#(I>caoY`7hwJbDBA$%nI>-^o03EENZr9JFG#dGWeNWo=jxhoFYpv$3wN<7p^>HbMtR) z-t5&iq2cW@gGWzy(O`p9Yrd28t-GiGd$z)6MoK1v&@SQlvtJ&c4mTG z3fJ9$64HY%+K`H>&Up&+MAd6^dmrF6?Y(dx!sGHnP8ir1x8*Nt@rA7}y46$ex9XxGJ-ZH$&iMkB9uPcBQNJ0HPz3|K&XkKP77+kFBwV0p^+S5cG&9G# z-yCl6Bo6q;n(w$A@t-;1uK#xLkipQ&6h`rT8)lcHxzj`;T6U6Jl#em!1{RY$zEuwI zTrliaoU1B)GkIweG=HroX^#v_wIha#F7{_9|Gg3&w$7J~`HIrD-}YieBuf02jLu>{ zpk-<_Cwsz((^=6jv1c0_6meWJX*wZSCh3}(Vz5;2oH?*Wo+!5zwxVZb$y?UWapPZ} zoO%5gnUoh_)G(lgIrij1RhS;SGVp?)#kZVFym;B2ng}}`K7S_W3vgDU)m2X*{>&*j zco)Q${es%W!ZAw_-n<5xR7|a!RYg&JCO^1)1>s-lqCnA6e#AsG9bCO?a0jUSlSf|y zoL28u_?=!Dj%nai2IW8&?zYkDg;zaywS5--K-`hKZJqrYW8QV9P_~2|7i+8Y323+B zlVuxQfo|~{ymgH%q=y)R7Wv74Qi#hq)?XBzy;x*n3sy6Xb+6qOl0Oxr@%rhV&*Q@q zMWgv+MlVim>F+b9SEg!l#q!7ny^PzqR@2TgW?cX~W?P-5s2gHvHe7sydO++jD zs-i_W_>A`s7MMl-iZVCJ2a{{rws$?;?ZU;`4P%v;+-p1`yfk>0lWL@?g#MjiA?w&~ z_pvC!$MnW5yh-XveNumhp$%|Oj@|GK#(zY#73Vi-&Ea4zbOqQ@j>WUjsF0Re~_~qTiLv2*8r;SNCcXY$x+bz$9K}ah^t>F4TSssn&{s z|C)khJtm|-u82Vtn=k&&reeP{Y7R>CtuA2E85_P&Uj?{B!FHpCs$umfd1%iXeZ*XT z59YdR+?SI&CF1RBalvwZK$$tuB^Xt2k5T(HZR&)@qXrR=K;EZ#f(DK+7Cwl#?Ld4- z3+cbkQ&l+Ep}v$h=|W|w?Fn@HcW(WbApreWeF-u3g`gf_(nYHNAMb%3s2cX08>2lb zKw@S_%ZxG(hMczrzS*%HMOi$Kwp)XyU|Q=YFcJfKL3op#=4Wr%?prH)%TcCFkN$RK zzq)?TtT7oq!;z?nF)Iti^r^!7cyl~aL|$Gw7u@?~#dqjM%z<#p7L6_(i=oP$&H~2oNjW48i2_j*V@)reNQlg<-1;bNe%yKo*(3E3U)NdKWA?h zxS0a}=rTEcb$HS(4#%q>QV5wO?4ueC~u-eqm5Iof22h zqm8D+h%=t%p&@+%h;AVJNy6n11A^mC67s*CG>AOe-OvY7qE5yMG8}Uhpm1M55G#iT zgtf4l9Wv1?8KW!=#=4-MM6a!Sc=KPt2^kYR#Tn9=2&~LlC{fai)O+lc^rCyyjpcCX zK7hL8rq!z~noWNdme$O7rmrRQo6T>SucgQJl zWhNtyYxtCS;JsMnC(~EeoF<7|uWMZ{2l@sNuZUSB*cQy&%4R~dCXh?>#2s6T#aViB zrH2&OPI-M@xw#c;>7n{ZnY03FlTWF9d(wPUhsfxzm@4Kf2(w0*5wGpRCtn8uYky_P z=^vf(S?d62%y_;r&lc1NGj0jbG!1c7(Zh@K?CxVP)T4ER@(gO%V{~z7%=fmsZ3mdp zCCK{z@VO3yvhv0~5G_w@UpITn>3lmtLg>`{Txqv4tYX}4LnDzTF`W*530098%&i-{ zLyxY+hD7!`osc*kHE+^X<<~7|c^D;N42iPt%VSy3lOKA z(|Y(XFd`?Bafn#Zt)fUZTTzaZ^PjXW=fB5RRRbS2O@0xy3YJ^-`-J-{yb3dWYtWv4 z9TNhJ1sP-Rh73pyc@JhMwlxe+71x06%ES^pM|?0lkliEvBVnR_6rG>J(g9}Kf){?> zkgl1nT4rDi#?WF$?0mK&VJ+Nqu27TDl5cpw?&=z>ye)Juo|5;tr1R`l_YcNPUMMXz zF4(SDLFPQJ%s%fNkW{_jc6RQ}9r=F(oV^W(!s1BRSKW5Q&3y(Gzq+<64seCvkxS->e_VpR0yNM21Gwq0tG5ZFhoGt_iniG^A{N9XMZ_nf9d(jPjRS|1 zMkw82&&p7J`HyYdL(WCF_RCFlfCk8wIU@<9iCCE^{n2=q`$t>@P<>sGvyqzqaLBtl zoXI$2D?;;kP8%R-)!vVimtW=1zfsrTd=TUC*H90|Vzg`9QjdnH!CUk+#=OKz+lEWF zb7XAG>!aT6uA)~xYC9vojDHxXr4~13^KiHib z2*M|@;8zktG$-_L&|j61#wvzp=LtUTb7X23|JYZ=5FvXSGmfucw#;iBV1&Tz*G?k7 zAdt73@{7no4fj3WJM{%{Z+~xwY*2LVmhnsXx0OSoW@}j84*Tx1MAs-(4R^GUr@Jct z#=zGNXzAr(V|iXXX=4s2N00^c^-$OWxJ zg>&KB;n2IW_n5c);6nnvc(wH?j6g+0H^+J zCs)akSa$;7%|hhC2cE}Xz!Z82M6@4!m2Da7+Zk?%6@LYKsHL8{n=o1FXQBB_dm-F% z3;H%}03O;upXQ;f&18@{wRY!M6 znzCX1V=I$!g_i)?$Mmc>fZOft=mtc$c~KW&bbyatkk;-;k3a9LZ(yg|rK)Nz8@*_j z`ih^h0K9YTMwR$~MPh_urnwQKzJiuwCMe*f*nESFTFUS;@3x(&0Sl^E8d!i_x=C3P zy3;WwiM$nS8>zOAo00spTpHmjFcv^M7Usg@lkfg1mBNS@GJC^yv#xv_iTJFXtnl`u zPN1p&skh24f{$5>36=>jVti}wh*#Ab)H)uf6^7dn7%A_i`>p%6SDj~h&N{5fy)ZGi z0aSpz4mIBfoa{OP_4kta(;Q+y3Qk{`w~b{#s_9hs$*{vid7W=(_ z)RR&e)z1@h5v&mXe4qhqyvsEtDG>O3rK3X4(K{c z#90F{I<7}`o-k*;s~FTOU%63NrTe4oL95KU>7qqInC`7U5~NVV)KhU#?Qbr0jpgxk zxajS|^;QX9>52NJY~ddQMvK9vd~&Xkb3^{QJQ~Q+IdGtj{d#OBzcv&qyBOZAskm5Z z+sKk8P?KPs>>#}cENy7;;$5_RQHEvTiqMl8YlB3W}kW>rAZt6z}}eRTCa z`#2J0e6en2KJEU7=JbprQY~AmmN_PcBvZV@?Op0MWrm_)?6@`+t9rE>*G=tx7QvzYnmWM>Dym_=JxZeI z+85@EvO)o87R-FO-eIR$94CQ%nt)__Lb)`Q{2N05XuaKOc!t`|QXVlpEObOXSrHS= z7T8y5$?wk9XoV$$bd6>4p?<-^*ns-Yak$`8Yw7||&Pia&Q?W}mJ^;K)#8 z-OD0cO*WBRX6l<4Z`ArJCQLF5-%xy-@4FB*@TlUxt8&}64H_ruA*Xfa`U0tIK@0cj zuUuCoy9^S=bOC=AD$C(d&zaV#qFQ=X9adcSV!r^|^onC%aPVy$dgV995%!mj;{qM5 znW5^=<-ot#@Z?aOCD7MET`(K$M@>nGPC_#trcO}5S{&-XVg&w>vItkC#iile0nfQH zI9tg?Kme>q$v}IF*f&Cj(>y0&9-5x!!g8ve42HACf6@O;da@<9J=X9&1ebv5fM+0Ni4mdc|66xP;g%0)A z3ATW3mTl4v%yxJ;z}m{oiTeVp>oa*N^}39nqz*eOvSvr)1fp9w=6yY!8^rYBN{v=U zPevC77l2YkBYaPzm-E=2KN#%2{5HsIP*bImDfO`8g*0PC4 zqQg0(r`$^JJdjdq>Fm_N>{VQ`CJioBi*b? z$ZiBNmvNLWo^!hh&AsP~mT_sdtLU)5+bh~1YJ0g3lTYZ{2Gt~&49Q6$YA6rcO^5^p(j&?APq~^cUKtyw?i+!0 z%g>zuq@!Rk{Ghm3=(CkZwzgJ2G8t_psF^#q`#h`@dX9MpjTqO`Edsc=Yp#yqv5aVg zXp&^=gs`4tSg%B(h^s4d23+#9&VqMq0d11eGFJFKrynr}FKn-2pR2x%$&e$C2 zq$be0DBK@Ms5){O8}bQpMTg7ZM9Bu_*#a?9(Q8jK59$z*OaWtX3N_&|pb)X2JW5xND!jYe9H$gwTdfHWF2N9@Y+BwYz5)@W*KoT28*3tFB$ zNC@jzU`klJsy&S(RYNW(75SpExDJ8aSU&E(FIrmNEwFdzijI zaYN0AlD%M9MuEO5NpZyzsEKJPy(^IjIFw~?I(B7JtoqT4g$MA*D`FfS#-bN`InsY3 zkQ@hee=_~SZp%^+eJW!3W1-Z^lS&B)uH8l2P{jnTTEzfd&TMRJIIuKJpHbd~@2_O7 zYb76m-W@VC%#Ui7b9P>Ij*69yw~~af4h4R_1gurz=)CFlT@3@3eZ!Qz5mSJg1Job? zQ+X?}^DYmP0ptG4 z`EL2tDYsT>rSfna!}s#e=F2M}4!f7O<(5N%|9x^B@IFXCt6vbQKda|U&(WQWkbYDo z-^r-bZU0C0-DrB~vFzHV!xKLK)u3!jZ^%oceKC=fD@R2%?}lA}iVGP;XIc(s{>swN zI3aT95D7dG7DpFZ?V3q8t~-@EZR5S&(dZ%pp;&}1>>+<3PM69U9jh$xL`Ps_dyt4j-XkO z%7%3I1X6r3yU^*1tW&v`74n2hz+{hsZ6V+q`q`Fw*uxvyISLL@sOB8UD9mF2hL$qj zz%7Vdf08u={`s^h)wuI`YNqfds|9l1%UmKqtX`8oXG;j?erQ!RN$?Ag282ZW*C>@Q z+6TEAp@@yj%Hd&s++($DXe8qA3g_&3U0}~EV1T|?wE`~7&E^0!ZkK2p*sam$Ca>RP z4~ck<*u=8g-t;`~hs{uR2d)I-mBG#x_n3kju1K?n^R1j{+x#Ya_79kG5~)7yKzjK< zLbqgnhbWOUAvjph5|C4dB*QGKG;)>S5N7yeD!=H?EW#NvAK;cgx=DUZIpe;GmBHxN+x-&x&=& zZ$H_!<~g~NZeKVN*qmWu|AUmJ&6|9pvMsc0;eE0+UUB^%T^HLnzIJSmGkgFt3u$X? zyb8F?SCM0?gMTstH@WF?UF6+ZYB@vB&Lcvib|rI(B_p0tD`qDIjMjX1H7p8MnLD#E zMj!)eV}m3VyM8Wqx(n#NtJnTjG&pM^A85UR*SE}(O0_O3-GCGkZF0DM(%!?3TfRfa z+mnP(`m$b4P8NTR1M8KhFb#P=9b7#xM&KtGo1{BgI$_<$V0#Px^qca3tmklP1q)<= zcR}iz={l?>A@OQmue68nL8;>(`HOE4ffuKpUI(S^>4AKzb7(DO$1^V^8@a|ub}<`# z*Szej2_8VzE~&xcaW~cDG0(Ykw!oIPyB@6%)AoMQ1HX!z&7^A!HFwHyZ_kCcB>@Vn zU$fl5*_|rTB^@h#nSg1lz9q#uxoQLGf~jSW9gmV-swKcL-&8QPc}8Ufv}wWOvdBW@8un;zJ$oysAp-#;>hKblp_|A^T&Jah9|KrZ`#+7 zwE7&k(zg?Eb|N+f*cjRZ5WqfL zona1)NRm3&H&`N${8;8SFOo#5$c!X$FDF%X=T(3%OE)OL*@#G**-2; zwmv6x^IgSIp&m@`Yg~D_K%{A^tudk686D!4Ms9zJ&4nfV;9EM&J4_qRpnZeG)n!xp zGrcb!4QVlb5-q&6Du?+4K;qad*qw}EP}X?aIjLV9@(|>n*Nbbzny`hE|H=*$q}dpBmZCYf)GT%gj5uNMxB=I(XPT8UqMQHBsmM^7I;rIzQ6kAE z-Yru_&|g?k#%A1~R<7YH19X5Px=D8Bh_4t_4cK-BGo(nnaeG6JOz-kuh`yltDy_>8 z{_AFCMTT!)%O2e)Cfs5+I4%y{O%G6Nt=^|s^|5pH^zJN&LUH+t-ZoG5+s6+8Nlv6Z z&SuaF6)H9mvv%b0QfwD(X=*J&`9KxAh#-h`=fcPs>gA!;49X$coBQfbZt(iRzkoB5 z5|-%vfnd9D2G@0G-f6QevG7%AkJ$^>i-We&K-%Bb+IPeSYeF}au+SGK$l8*m9L2^t zCj51#H1^G83(?H0M-pkXF$u_#emHGf04J|cuybY5iV?jYgkTkP{2tzDyv(%wCfK3DD@H5z_S*lV;# zktQa36CWJrJN7DwJ2M=e-&Vej>zeReE^U2R<2o+ZD$IWEn&i|fDK+c0f!prx6Q1g0 zcg?ZRKAVaPT5t{jto{i!dU=$`CR+$uU5HZAD=o5h+l`6-kVdNFOd_fklihgqY&BAM8}W~dFoZv23n`+DVeE9cjkCCv!rYO zE@lPRoLcwr+7@az2t&TW(B^X+V+3G@3MbPI`7x^=pfmBC{Zlvly_w?cH^q*kjKpq< zPmOZsw9a(F=AQbkCfhopv;`d`gB{)^IuhmZAq5kS_JnOb(m?>JrOB7cR^tIQ;+$Rb*p;yPwv*nHaLR4tYl>W7$sS6KN_hW62n`WJU~0>U%cxi zBT@T8Af`j@9NP8MU;X1&b%EO7D$=*{@;dNM`<5Hxm%rUe4r~T4@4}mlduj86LcUNL z)poLrA5sqXR*Y1hh&)Ov#vA_e)B7xUDnC&_^w|p3*IT+h=DX7`JC*PBjopWz%mt++ zZwtv?Py@W$(F*TT=QtJuiss^N?-L5* zQHNaMR|)$jlb@mzFI7=fK3|jz$(o&OLO8RRz23S0rE~o;;L0k!M~zUq4N+puSJ1q3 zt#?er)l}<52hkqD=WA8>`$+6}KdYwt84=w4Y<3!P(m5O8&q-|UvPgmQZe`7Zn4L4o zqI}l-K)^lfZKX#?$6b|4z9V8nZPa+OYnrL8 zyjzQL`++^G6>dV1K@ZxB_vj5Q9%l6ho*`9pr~P5F51P%1)B)l_#Xmbmq>o_)e>y&C1_1Rq)*e_e;U7;`QH z@bd-!@;`Ig+O-Kc)oSqK6#=a#mV>1)_MmqL@w*f7Xx_lx?n@B)F50DuKjvpLbnh&nzB0J&WmG-x(P(4KCFsc- z3ej{<^6eG90`^exwh$XR#=N70Bx;7j+B%IkilL40jHni6!X zaYE#96Gik+BluqV54}*oM88m|-%Nu46WH3G7v%Wtw5!`9E5v?+3J?692Z;N1W>-sj za-9nLXAbNBPm^&mi`lg#H1KnRR&epsN|p|QWM|htnHSi=Rf5`g&@^dTdqJ<~fB_3! z+wS+Pv|RCk2F^`iY^*`^JZRNLr{w|jB`JplGW!MU5#T6;2N?a$YN7-8; z-&XD%f6gybRpT@wtCW^V1ty5C*pG=(S+X$!#Z)1;KA`rc%{bZ}i7h`Dod79o(nPQF zbJ0XGauLAA>wNlZz(sb(2@TR!*=+5P@z~7QWO-wnMXQ;8&q~w{jPCZwRuNsKSQH~< zK1TTGXuo&D@npur`Ht@snFWa;UwA{P1m*m*9mTKK482RkWz2RB^($9h5%p-T$puPq zLqpExfSyA?QHu2S0ajpV+8=ac7s*^UkJrNT&AZxG(3H7wiVUFgH+85P8q56n>Q2e% zWbWl}WtM@eqR~5npHxZ;#Vd2wFu7j0*2U2Y4gBL4ilpvEUou0(*S!GL9c&RId@@bZ zo=#cg{g9DFKDP2N_60QR{0d4qLNbcYUN#3nZ@GIlu0lv2(+H}mHAfrGYye+e8F#lQ z+s=Rxr9lQGF9iCDAa2V>T$S1v`2M(sA(xo-+%!!ga8C8T;? zz?lK2^E6p?baCA9eV`C!vAgW!#tP|B%)`iVU;1W1{g`6$XV8n!qL-UqW$B?mYH1)k zWP?1n$;G_O%RrcW;MaTWU6OgpG|fvADmOzN&Sj{o7X{=D5o7@!*KQ;1d~PcYVi5u< zO6DCq?KyK8DfL|5s;LWG?BuVJcG+ECJ*AMP+${Kb@)P&Ja>m*^uIr2ea38516)Mzx zqMbrA!Ii?ua8Nb4f9cEt>L{3KfQStTz9vGM*e-4#f1!GjTnOy3u(I+LMV;lVoydp9l zj^u*m{>*N}@N^69oas*4Y9%1n5BbG`ObZ$X3!HrJ$O$GP?{bBCzB3-K_FP3?4pWR) z=crsbe1whVddDLl1?@m8UKico3;$HET+^s((oMY^LuqCg%IJ1)GfaI*s|nIy!OUVm z6>OB=ws$Vf#Vhy=td!KfdFj}vsD_hY-EDV4F>@4Y0{==Jgq=AMJXj&F1y>yGS)WAT_!RG0<_6=i3lV_2 z6}x91=PC)}>km}r%T&SRy}*hjyV9# zclW{A==(;w&V~iI>&kLPM$W(dk)DrBOIyCp1A7Zu_ynB+T;*%^y1xgEOOaZdGXPel zvkuOF1f7@^Ch-#$cIIY+chAVK4^XK*S82ywqlSn%H^)zm{=?-{7C+Hla~#g} zb#FR`;brB$)A~AeIq2SbBjw9w?Nlx15J~=PppGvs--}O=;z)F?2ni%hg?DY3Zl&?k z%xu8yOT>isJa&rq$B-O_8#qwr;RuHk~4p_km&W&LY2@P0yKRgeQfml5Ix(=Cvp zGA%ah@Rt6n^@L#iRVVFja1AF>{NQ-%L}ag6tG9GEkmeVJNMH8f+rKFS%W>aoS!1-x zM}x*RcF(l~7E577t)ju`)i zH1vly`!9B4I6GIi0Sh|bp(`Xpx!{2jJcvz3VeCt6YrfOlG}$!4Q_)1RyaLKE z&gJd58c+MpRw%g7?=F^iGg%V2rXHh_+3-js#TDhz4|QClD|k@}?#x^^K||lJ?1;$P zlj@A<+;unsQSG1sT*9@Y@c`3GpFwt zq>zvzm#7;*mbpwX4zbqG8%}6zh+kQ< z0LpwO`ac?b=@5H*U$tTqQSFnE{!9#}I2b~GpnqFplk!nT^Op3BDDlPWB3rIpex#byT4)3781vCQ zjYKyeXzq-gFEhEpvmL-gFA!8GXXAj_gB=9}zKPT52g6VlNvXY>DZ-5t=r`j z*44knTl~TT2l)I%5v;vMnXfs(hl}GQnGd;Wx6bs?uQQENNJ^mgKdu#W&)qc8CeHg5 z+A*GUKnneO{iV}Jihc5k*ACmZ&FGK?_do^RqT?8a7R6x87sc0Df)V1FgLLJrcT&XpjOk@Zs-hCKp-$$2pgx}tF-k@- z!^ z)1B2D&Kv_5zsq7c^}Ve1Rw>YzE6i9&)nxta@ISIw!n_F zGu{oNg5}D0;JC~N!v0YMgnUH|I6Ul5H!6TDaS?w>pAj_gnV~PNyKWoztb!FWBTp`t z5m?a)fsx1&N_~4b3-_xQnK!`5X(0TWFwQtYJ!VRTTVZNWI;e3jy@GFV0_y|3a*Qvh z_j$L2{0Vc6vSyJD1P8;HXlqfQs^+266)|qzDYqPOsD8TjZEyen1$;AZ4OCjFKiRg$ zcj)$Wa}>es%6W@~(>vnN_!%0>tO^^K8_aUWih>(CHDun zOU8L+3X0hlUpCdNeTy0tgVJj5wcK3opU&ukp+H%E8$-*3OR{D6-R)B74?A~pA$gC5 zi0m(**&Pw=%KZS2+wlAj@X%<}516%}8TuTru&S8&6~3w>RWXF&70$cceiFDTC&z%x zwyJ{W^Xj}xQy6i#W}I)%L9l0h>->33fev%k(5xNzf?Xl*WMdS}aiz!p#lsxSONCu? z(5!kGcR?Sjw<30oQ(>p0pOYf5#FyB%=}rd`#xgB-J?kjy@k2JEF@@cuaH>0k-d=sqKg_^jMWXARUPi}4v$AO{;FbTooY}PNE&d&6$2qf z;a(A}D_1$xUhw)@UPf(cA#qH?pn7a|d;@e~R(rF()Rz_O)HJ!aYpNp^WINPP1T-|- z#}1Pjv&7N@7p}<|i3ASOIj+4+C7+J+^;v`7$FVdUsH@6Zm*|b!g=43#UPnud4}!Ke zB6valv4B8c$$w7@?L-o0d6!3+=yg3p!FRG3LO)$d3&WtD-2sM52ax%* zxIRYBPbBV63#q~M@cM28rIb9Vq{zMgpu567LYzqtGi^C6N5M@ zxpF7Js)I#^6!B3BW;?4xkHUw!AM&s8EnqkT2^~E5M9BJvdl+^b_;eJS@QXFFQZ^q$=TY^c?#wA%3M&FdeF9R z?cYJ@yicgd8F>_Y92M+(=2>$7N`D8Er%LJX7vgKqgq6r6t4)%_zDkhyZ%d=a`TCvP zY!2k-5VMmsvjyais5!(?db4Z61x~GQYEMm5{+2c<1=Q(y1?cdM7GwY)U=8JR?%Gzy zoZ)_!d{3O2#vYYdGr zW${Ac`yXyk{r`LN4doxt(~zI>8PDn`ir-zWdl+I;`0ec0yT#txnQ!Z(RzLFpQvYw{ z>Gyw0I*tCPcT_-lQs{9Z^Y=GH#qCqH28jnr@PKW{&M zsQ01u%=h2Yiqq3IvwaS#AOG4te&C_~{?EJpC+6bnv{Xv>oBr!&yq5G*?W1zO`xn}k zyH77X(#pwuZRnlqT5Vrj6Uj}&8)rsFJ@x!$TPT&SDP-oJl3%$FXr9VAOy!X0Q$x$5yqd|4a_X&{p%tFW%`aU$rnO}!x8&-K#6Q$DjgLOvQp*G#s1fco zOl0g=Yrud3oBm~$bWFcxLU3_4irw8gJFOZLQ7F+C+8lJzy9~s!8D9%pDQ}fmn!&9i zvgcKke&;H!w0P-*lY8W6K@)!OCY-C?+vU2p08@*$Nb&|bT!=hzHAVSdeC@)Y5+8rd zcsOCb0}NyAYU%YbWQ{agXa+Ipm{=vfkL0049u41p8|Q)qs`Ix=KcMktF1`R~9mPR- z#1dwKWx7PUXl<#OjK|IbQp)U$Hg<#@-D`F1h#6@&C!v_zmTS)IZlF57?=kWVqFj1L z|NgBzOFl86+GdnSA>jQmH&?Nt-P9@Dt2n4c8kLD~vp$`AbE!7hJlQC3!ZCGR*mH44 z8euLfn+SY{a2IiKbNI+LXhttScdNRK^&{Jgl+uD1t}+WN-%=v-YPy5(_Hj0a;%jZP z=1Z?Snm|)jO<(;G-F?g2%u;ku z*jM-PWnm^dOuMFQ%U-Cv&z#`~_09kuucp3_R?D1XsI5wP_+~+Ta)sQ?nagFqstRqh z;U}N>c>|^r;r?g4I`#0lp<_%iKNY~ zVdl5*?+8zN$I)r+YrXst*@{mTWF((% zomabx!%>B4HmG|nxl|vm}`<65%DKvRZ?W9cAbdS#PG^+I|0;&%=mLf}4{qYx` z&?Se9pLYi?qfkBz>Qz$3&aaM{{&{=Rfrd#m1fF08rp^)5q?|7OvV5j8uXTs9L2hkI z=%Ljcc`B<22w=q2{_wA%QDo|up7}3q_pT&rU_bL5la3YUms>Zm{z|!!65P@>z-G<* zzP8(lH6&5bLA{1fcM`35x5OL9NTCn=Z@!OS_fNOfH!adkA5Pr4Ul}5{DB0$|Zf^04 zXDxJxJM@LbMcmh!URqF(OsV@8eR#O+?yD%m?JVK1PJp+RUne#O$EqfUH2b16eqK2< zRWZnajeONr%J$*u@u^A?vFkZJF36GQQW?q5Dul?v6Qz}F769#|1rAvS=njj>&tH~b zw2m9(9}BC}tZLD9>Qe|je)wz5NzgUk7UImDn*Z(jt5=`ja_zTIfp)%imHBVD)Ba2D zNez+cs@s#JsgA-;y!S_W8}3^mpWBsx=EX9;zicq7oJzQg8#sRQD=C*Sb)wu+W_g*% zNyhf`#q{taMtX(vCvQ6y#C5)PQ;G0#@;3Z7bV=cs{~@jjKd$M5KmP^$l6rz~`OWn9 zt1=(cmHx>T6q-mwf2By@Qtc^Ai;6Fo*1L&4hSQUJBfZM=F5S`^hAbaHWM*ufF8{>z z>q&jVi>K~9C^`TfqBsUro>H>15_$6OSHMlcm;3n@6M#tE(Cc{d>B_ri!=rqh4yy*A z)DxwbW2#I}-x!=2cmAyz*YIIZb7(Cf^35U6%cLcBe+`4ew5BK0)kFUU)W5Bic_(yh z@N0ZSqtE4v@Sm@;&r3XrE3Y5co?g~WkQ%6}lMaxPQL7wjej<_@_x{+UYo8C7M#8hd z&La~LDWf{kasET%7KNu|qf#iwx$5KZ(p%yapnFF%);j#(-7OH`>P5v+r=^ynI;@_V zPxc!*#<^!)Z}C^&&@-uzy!7_maAb{N%om;}{HScyk%^D(zp&qV6gT$@YJct(7C!#! zeRb=WhLr`RbLeYxT!Y#9@zCiGBLHr+!c=%SPRx>8e$`#hb#*B6wM3|i*-*$QD@B(E z02wHNF&QX3rLi(toOIpA1YCLms9##fSPea=1556)-oh_|liE(Oey1{2Qv3$}gjnv^ zpjXsl-^|Z~pyGo>rS90tPEz#mG<%m1flBDq7zMY|I@_}*FIkJV=-nMcB&^Pr+?fz0J!jWpr$&f>Ce{D zp5jSNpm>5&PoCXB__LbizaEEiZetGSG`t-93^On(lm_$q&5%cuAC05`27!Myi6$iA z9x>{Dq{%yOJNUgT=(BkOJ(Optu!L@c_EM-8pXerQ`nNf*^&fc#4HG(h=iNlk=EY{T z4zq>&Ow?y05Xuz1wviP_ihE98VDX2&?sNMy(NyI zR(Fi%MtA20PuBlFj~*1hI{sy{^4LOIg5c|wbQd35^gEqtA!|bE3+*hhS~p3Q~U(biqyzWEmHn?S?KLO9*40@?t!DGqencXi~7S(ml8~#TyAhW){Y1* z9`F{bjUbtoHQmA!`A5c{ei6IA8m1}}F&=ixU-gfPfMesyPkot(F4QPjr50zJE*?4t zs%+tTbvT`5t7{q%7Cu#Cck*tc?g2mt;=5ykeb@0#$6PFHSs?Ds+i(34{r|Fh;kxI< z-(HInEO+ofj9mX=N}MGIWXFxYJ<`exaxVxNdm?cwMJD7(G--+`+j8lSC7<8Ssl%Ua z3KRuJg7pDN9i5zd@p5r{UMGW7w=(sEzKu_;9?8z>dB6Q6u=uCBX@$@lucmhSRRK@y zeUp`$NZt7C$VdO`WWM|moL7&7aKw$zs!1p~?VmE+n4Hjim~JPNe%iUy^63Y%RR53o zQqTF!&GY3~amT8Vhm57N&PjLoIvDA9{EmL|VL@sByq}t5dSLOK^TzbI2Zf{dJZKVb z%Esg*vmB81J5pJ``|o&EYAPwOk|$f`>XoJaAe4wg+Cr-*2-JyiCBR_dD=(V=!5_8h zGch=}Lt^g^{^W+Cs@dudiX;B{?5l1#Y`JOurdibW@3OMW;ST#NX8RWm|9z_FbhkA2pt6#7*cB2r)fesA#r7fi zCHlAB3@%{Q@?`A2XO%_}I!(j#>(caASn|xV1$j-znlYK`BcgV^1t=*x{s5!Z8J6Qc zojHoJ>PQO@klV!^&EL;^Xi>NsD?skb19tl|#vF@JED1PD7Kc%ubm7xuy2EEmCaf9# z*kz_|@E=Bl+1t~amFdSe42qN1qL4S7uN3zIb+gApM4Hj>BD}gUb5>u6bWGw;wa&S? zDBs>Rju#tyTDmr5_VtR@xR6%vZ5Q1bx4du-<3`1%$n)~Iu#O#*F&+*s*UvZP2#)f2 z{E`iba!)++s#wVuVa5&h0HMsVY)!n#jzwTQI*a!Dou>$OID~CM9`G-bNok{Wgrl6Yo<6?(mll?X4 z!$lu*OD&zm)15ARd7p6$XukL2W!jOlx5O__(|LR!JC45KrV+Q#%9fzNh3;nN6yPK`>WprjNV3C)@C1$R}eJ$IsI^Kc)1O#17 zF}L_Ix(pNeh7y26-L^y!?@*Lf|L(U3((Gbt7pZM~z=5u?cXDmu__;tiYt{Y>y5$!~Zp^!f z5aj{}hy^bEdjxat47z&H#gF#Y7yb6PwsD@8N^@%lE5xS`+kM8j3B?`_zv{r*#SHIRDh`uGWYROJg-7bg3_ZyYEXjz^6E)Xxo&X zyhLhzLcFIJX=EQMsk5S+YG53-xpm>86RVcw4&;4YF-)}b5yB*+|Iu7}iZ2hu+%n+r zt*7@Oh)+N0;;n55YHy4$-a)QR?oP5K{;Y);+j4AE$hmNFTsN)B$hUua{0!po{f)`p z8{fJH>L275hE~p(E(rf=NP4Pu>+RJK=PsxAj+Qqkx|iM9_I9o9EI+aL=-Zm(r^Q1V z*IyJs7jrwz32$%<%90_CQ>T=gPo9?W8(dPkTDT7gY!CkJwy2TrOO6 zBG~(Ud0GFB8eXus;&cGqyZ;&P=r5IUW#7aZpKBrB!tb4x@GmrnU!{NJiSoD-9K2%G z*HSPM5(h(AS*Ha*h(^NysMafWa)IhW7i8^2mh>`lO`U?vvp^FHX9d; zI1cjvNXQsBqSQPJ&3eu+S`ZSXF*xh3;6a%>HD>XWuQ)hqEX*#rT;Y72<&S^HRuPxf z`acatn^;x7dZtKTRkf

`N&$ky;(hBR=n%K02LUiL>6;&~;Fi)>;@Y`N=cg+LJ6P z@MiVueE*n96pu}w<8YeuR|%gqlXs*3KLVPsKVFj!T2i_Md!g&|R{w`|zEqETe?aiV zGuIM-+6^avx$&M?H-_}~wdjJvMY1B#beg>zUyirSXaA+J*N(xrYx9KcWZWDEUAKa{ z>SYy6%g_Byjz<(rCXvxo%{OZnQ+d>)|I04FXJckRaU*4oZ{7XaUdkU6%??M6k{`C` z;zZes0GaRGG!U(-sOTj$&`$6xt)%a)8r-n$N)GyZR2g}aZoef|b=LROGod2wS} zNcHQPbd5>DYZ5jKs@+1Cnbvn+@gS=A6#}7=a|q(vO?CGY zang2n#Tymcnkkc?Je{cy%tO{FEI_o>+PeBS!cvR%7v15@?IE_nMpyd}=9H421#J6v z{nYAyOxLjc6bOf;GqV+NAqt5KJ;)TsbbDF^T?q!qCdoJ9;8ZY}rOQ;$9ZBe1ki(pF zZ_GD4QLF{AIr|$d({{$T$X2$CU4qLdUU=P&5Zdb_l;ScqzuIiReE@48h*dz{6R=Gv zNoKI*+>LIo6VqS`aGtAz*ISQ`?i6C;`y%QZkthh~|B;oSv zIhoaC@%6H`dc=BGhVAnOj{q>bH_iZdOUUhPw(NEnTNNv zA{AnI8Zym-FDj=+7JT5deEBFX@Tu-gCB0DIaAoJG7Vnf~o(uhYac-MC+Uk!!7qs&2 z)frCA>;w2kO%Hmcy2>q#qTk`fKI^P#Mv0mebB^=8`e}8Hl7lq{ELv-_ z%RcD;R8Wgf770_oys)A$l>1pY2$5qPm8+JR?w%ifHXp)zt|P4x4IT%9+ryx7BsA;~$ttAwGjU9HU}75NqZRpnkrbyDtI=P6Fcr znTI4LOgCggCr7ngc_TmJKx&CrJ9b6A$MgdyWMpFhQ;nDNo0xvPtYDUIQnw(17&ZQ$^`hUL-leOOPp>WQ5(+TF^~3>I@GT8F7=EF z@}aV=UEynY-&}9i))i3HMD}whOFqjR9|2}M-=)p%#2Q+>JD zkRO>uRh}*y;7$!@*eHd%L)y540L_QWU6{5H+8;f!%LjmXuz;bK&|}h4+bJsNo-9tN zR2mZ<(=1m>`t9z~+OCUmhl3Kac|tfj$oYjtg&hXfQXju5l+O68NHa7FpPX*Qq8!HV zr0hJ{yM)#vF|rQ;p%Ezf_sPutHPGjn@QuBj&XIi?_k+PY42pz^P!XgEK8+CCWM7Hx zE9o0Txvg`2@EtTw)b)3qU^BPrn0B#|EhtE9N%WgW2qY(!!z!NK{ywr;%ct5`Yu^?8 z&Q!6O5~z%|SNuuC+XX10q^lnU>OHZOn(=z7}|SyIkr&Z&zmlSXIrk(dG>RW zh3mr!{c%~m(K?eq({@e1gnY8Ul@bl;X;xNKZ#)#EhLSG%gMRke3ZzVb>5Lxqp8sRIBoFUmmFwwk;wU3H~+0wo?PDy zyYt9`fXwGdK3-lC`}Og0SNa+M>A>wcLcratA2f=xhTV9-ha9=P{ro{{%zuhjum4>Y zvR-jBoBn>{lds#UW5LQ3FEZ{H3M_J^v4*KanuGK!uJdt+UxxYU@QmGlkf2Q(FDXcp z_ttq!KK609;l#`Ah$k`=2rh6KByopWUdNZCcbRL40BD zY}3NwGJ;2ob?S33oyxLCPEvN{w6xVVfVStOTcjIY??XM0S5Kq)WU~lWWkNqCA70^< z=Xpb#Pp`N}rdsN_0pvqw#tm%D4)uM^{COtwg`FR#Q^3mGoInNGc4ff(_2=ffhjN>x_5 za-f09C8NOlFRcYbftqn|G+$sCIgwj8ymeP9Z;m@}W)0Y#FLxP>5zA}PP`Vp2I1wVe zA^_1@k=0oa5fG};9uv4-HQixdDZCQ00)3s&6Ri1KQhix5Ypuf})$Zd}^$Si0gKt8O zE?&y|F=3+k)Vp`)zct>p(kRP?Zz7H9A8}@JcNn&Y%0G4B*A4)IwMhg%kTQu0G3dfd zkP-I#(}DZbKm!GMxr;k$uU&V#rUH4*82mA2q$}(wGQ}lhej28*TMbp&=SRca+8dsu zO4lD80JO4_;D2_&9mqLGYe0W{P&qb!H`(hfn5d1yrX+NRyEjTeiomp*&*oSvM-(OM zqaGvATTy)`W)LExB_a?p43|yXL3|L+t*qUE#%$dGI`zTu?RCOEbs7T}CXqt+_8a~V zwbf|3djR-qJ`8^!s7qyqVKe8-4bd}d(LMYv$Q|U25dRjzMTE%Kk_YubVTA*0xM_AG z)4a!hmfWU5ro|8ts1T^YR3d3$fK5-q#fU3c$zoqqd(@0}qoHFTVnuqcbLbgk97B0o zRI#TKN=25lk>AP1@5I;q4*-yhFwfIc0|jSoz^t^G$2rGB2r~>HlPyeU0WwNN5@zh zPdrpk!wj1Zw8mxY@rzwO)OfO)M~i;F<&^M4QEdbH8M~iVWrtrkn1(vrD?H;m9Px(@=$z9jHEq)De0J;^ph~syg7oUu z`+$j8W0!6goyjl>TYf4T5)u4^PkNbGqdcqdc<^}T4eow>cicI>jV894#BT_ok9p!QabB%O!*_=fu8cI62#!^i5(XkDdDYW&OB4!s3D$Za9qmU5Kr zDqmVdY09TgB+>ps{M(TC6HnC@oIeKh#q^gpnds|1?7G@tOz5S*b-omQ;jqPMs`d%v zyYKqmCT~(!j#l}JT~t2g;HKyna0XDfIWnnp&(b-!`bTK5a9(A@3oZXoJi5keKO_tN zBZA^@2QS8WiLj8*kR0`eh*+0yga?6o68uLVlaJP9{?(*{#nXH1D-sU?Q5WVTip^A? zMMrK{Z?1xXMY115;n8dZS9``i%}x?3_*^Icza1bI_*ffiOCJX3APD-3=TI?TcmeKV zEU* z$k?#D8|$^C486TuAs8ipr{wY`v@|CIL+w9aB9Z>rsKlN%!T7yx&@C(v;l~~T`k?L= zINT<6s+#pzmtH>DoG032yZ8M7kPU3aLTcB1nWJ)FYGQBlzI>?MY>&ROsQDwcBA&Z#BL= z;gAaG{X9K-Uc6FLCs3vNM=ZtooZh(BB0E(yy~9K8WY??ngAVCDO2=>4WF}U+I{qap zj0VS-YxK^mM;vR8D<{xRXlmB=ip3_k1}jxnf5=AhM7)r+)%iD=*(gcjn5F z&d|$l!LKGENN>yMzLm?Gk*TKyZ>4CpVwEmkuNIiT`fjXFb1&{Q=XBw?+I77(+g{m0 zy9w#FxN(DdM?C;zS<;b^!7(fy6U3p*V$7%?_-`f++DMX5;=9G z-mi^5`dQ8dZ&w;s_Nes4gXhPb`c)l=KG!^TxdDo>agKXg*1}6%tJb>DD_Zq#j2UxW0POP z^hw2X62@s)^JS#FKUdwrX+A1C>hVV*MSQ~k0{}FT{2(LZxzGja*{pD}MZ)akx$q4X z>q*xj$A2AO+ch28>~&#|wN=tfPFjMtn=Nn0E;Ha*BMcS_Cxt^n$FK?Ob)_|9yDCEg zX$$h8TpBaqKBuveo3qF60UsNU2+)AX7=*6J-5FZ)$}-EV$c5>U7+VpdJWPjVm$z1b zw>nUK;{*OeUNbvZdI9}0R+ZLVE6@>}Cvj=;W@$)GU~^+9>W(;2JY~`FbI@!zhI%4q zp*~mVVKE$(y*}+8n4Z$m2G=8qSy&tdQ1oS0;eC1^@|d|wtvV?*JOS)Q5)0F!2Zu1$j1u5HdP z9J0qh!oJ1?ix;9WsSE22i1OTNyliYNNGRl`W#IdC*j+iOwZ%;=7MIGId`9nJj#_HZ z5v$s_u0!z&tqv5HRgqS}fI=6|&q~l$eTn*07TCf*#w zG%yd~8h#3H5kC^g*64rqcPAY3}bvQbDcAP>}X#75wsC|d2ZxKyGV0; zY6XQNv>giRLy3U9FFwdxZ-W;i5Od98{3&FF?k^H4aUFI5;6{V35Zj0j)f^#GHFnwvZgF zp&b=DqE=X^FC+hrfg5g9uK7Hp&S>Na7Zes&^&>gtz1VI)$(R*p-=T&5SZ6MYlIV2m zWj-hrbtl6}XEP@mO;2UG*TJObSYOej`nIY2@Lt3?O5|F9OocwBr)yyvBuZ{I{0YyK zeH;I;L@WXiSpN%P#PTlAoTIV$6Y;yXMYvA4vmV>U1|1{R_D6_j?8y$)KUqA5LJdrm zvPyhMx89hz-zJ>`5;h0PSc-hbU|{pm*-Y;_8q$*&l!%v*tt*F|2U027c@iq$q-}oV zi6}DKh>6#jk647A&x26rSccsGKG=vryM%xfqp&GE87hsO1?8{$w zY&GdS6_P~Hn_Q`w>&RZUs)xen;TV;8s4in8gE0%H73(HZcixraL{va|{ouVuD1W>m zI0<%~sRe|RSP6Kt6%R(xU80F_wq&(7&5zM)!!L~^(Wy`kv_VK6SK7{ix>ytc7qtNj zgUI5*1HhCGguu{AQbRj385M?GE3mw|zo}qRs6EXFloc@p%&~FJoZ!}m%PR(1O0SP@ ziHK8|WH_>PRC^k&-LfrgjiHpNUT+H^#=>lE67)2VLV5Z3CB2`uL6X0S z0bxctCaj}WAJw<+{yrP0H-E&?vozZcnTFy!>m&-PSW+#g;c?!7J>S9Btl7SpRinXH z^Pw0FWX`9@77CNaWoXqhz6bG&EnN3?(LNB})zDRU~5j*eY|yel$FY&O+wW!VduO zL%-|S_P=IzY_#!;=O8^5;>fgfh=6z;^el~P$-26y5?9XxC(847EwyeWl!y*BZ>MQV z{`MzgDrZj9(6W6@lp*r~Ab8FPH~3;BTtVLWZ;=2YsIMsi4Qt6#13Pg(Pd9eS&-b?efdwiU zXf;MPI*i8FGS$*SORyaBhSEZpp+LiquDRwWiHdv7wA(v;yAMOUPcdRW)6hcnctet)Mbek$?NNEEOE%Zz`izPwN6do>pOK{!gs=_wMid0h)ON>i}?g z1FhvXh-w3+l8Yt2&RJFLj67ro4ES-fHvXo-RN6P8jUW){U;C`@zDoX*{29i`g?SbS zy6AQ6G?_spnD^~$&>P!#fY4ia>{yC)=cu{5*n6P(E=mEA~Qio)U$Wx27Fpe827IfXsb-cbMQv=k$IA z+S3P1@nz=+ldu;1ULtH!xI{Q%HoZhTv12Y`6A*!`jjHHXtaK(!_GL|7bo=)Usggb%RA(EkCMCZEK(F4GXVhK7?&Pz15 zXd7#d@WDaK=zIE}wL?fMF?p_!QzAJT)8>d@B)Ro9{+&Gmy*V6X+XVw>Bj1y#$hE*W zNy!s_yJ#?xB{?v;<=Z#&Ix4|(W0xrJL}6~EpJhl#`ZWHUZTk` zKfQ#4-ck`KBhg+t^5P4Ty}PsIzeiY0-R>%i`ylB@%y7dyhB;b(a{FR}bU4M(%+XJ3^O5aZ@*fTZ-3os2%FV4u)xof{Dw{YUNM@Z{ZYRlwa zSoe$tz3su=49`XlZ|2Yb@b(8iv0u`d2yZn0sfLYEq4$P0><~Jb$_(A{()!iANo$>f zmUU4)D*wTg+XT88*du+!$_>m!>{!RnHoNNX#YRrH_Wkx>O-~O1yO0Aw$1aH6E*ndp zgIdysyqV)f;QK#uOVmIaJ=C;Cxg<|T^WmkyFRfv8EX@n(8$sYUjdC?R9viHX zV_xg><#X-Fn6au%6`{WH@OlLex7cm3E6`Eno@?w^#laXLYh>3DpT;!gfUzBfw;6mI zZ?oI8Ik7kAJpH;+ys6A26$uhGf-P`Uiq`+#YR9$$9nGMe>|rI4Jde!xuYrJAe1B8l zKY71=BwlzVd5$9$4wLj$aQ_J3-qCQtuAXQAw6*?%|6F|J0KnDEiLn|%Ppzo0xLIL1 z>}~@kzP2`7zx#ziFn!j z_-3?KNw?Dw6xOOPkKDVk?Iq+j3R`-%#A?MqVLQ;#cW5FI!Pcg{IiGO0M#bLG*gH15 zeNESc*#`i6J3oRW#yq^E$Yii~x{PRyx@YaZ_t#*O%EGJU$8?BA$(t9$Xp0V3MFFw~ zi62;)Jvw!voN_~aNzNa zpi z&gsp3w0`~m$~cA~`g`gWcUySb@7V<&b30l8fclCrYeCT_ZWprcV*^Gjyh+AM^pa6N z^82|uAIiT-jr;z0Dqr6g`;C0+Sd)3j_omx1GeODc3JM&REzR?U@~Anrz=AIt8HnL> z7grg*3A=kCHE+&l;=BjnHKvq~UgjZ`zq8g;eH|dq6UaVywM>WguZ5*OO!*#En;4i5+(PX*XIX ztrc#NM&Ifg+n~?>GRQ*?g(_cYanO4Se#be)s%JHIwVI#A%R#lYP))v6GY<(^Zmc$E zXhuI4#Ee*&zVf;gv0hrWQPNPSeL5vkn*6GO*=+bpHfX9T4C2Bx#}eD__RKL|4H>WG zY~dtda}gmvR8D5P2q7Y`7o)*MgMrD+wH!)GW3~D1@%B>gaU2K=Ij2ocjN=A(g=6Bt z4g^A#3lu)eo>+QNY<3Ym-j;MLda0cj@GG2ltFEphB`lkh)wxj}gFs)2F&Ew_+4sd< zIr{#7-YSfr%*N)=Z9tx-Y%IWkdX=$bXrazKkYpq5+3OX{+JQ&GO{mpaP#1C9^TPfJ zFs17!cqxTdL?)0o!o-j;@7aAn`G^Eq1^pk0LBMM_VbdQ9RsB)Eb#w9IYix2U(g`d;B`{52ZKmc>1kzK?7(Y4SW>!fJ8*&lBjUZz-h<~qus!5 z-S!CT0PqZTncI=cRU|I3CQ-z8eXptx;&!M4JuHn`N8M29ug7qlRd9Nmcl2CZxm^Se zf-~~!nmxMVDJztSc9E6llkS4WL1!7}JU|J(6O0p|kyPg~T26uWSrc zwq(5>8!NiVg2FXQ#0O8G^)qun@88~m-jJn(QW3-uPg&5T`L8rHM9FTNMXEBC?ChD| zPn@~afk4oWY{k3h=oM^t{>r^t9i~?@ciu23wT|TaP-7cYyUF`kh#BC;e)C>5hz<** z%7yBR=lL-5ORCrDRQCsFDinwe^hlxx%mJ$OMVe9Hz}M*a=}$xLa|HZ&>vax4gQI|k z^X{303OL0RT>*=$=*AxSai|Zi*lJIuFnn5NnG-hjieKm!vBZH6JF`Jys3$SyP_b^i z(>6it1dJ6p>sIaQjoIBGrc)uegWz7X_R>$-mSg@FZN%tCyU?H_4(WqLPcK<5ayR+e zhaRxSE`K)CRsi7zY;~fpnNna5y*oB1!kI>|PDEL{+$m{408F!6Cwp(PHgkaaMf9}S#_XKmk0fU~KKvwIp|9ORFB zYJWHfz5_DfoAyCH&9 zR4lxb)F>qQTl>D=9iyA=NhQUOr|Py}Y>qtU93i=e^~7o&0PGn{JcX|9vjfZV8!&mp zRwZUpg{<<~ z5UdibLIj*m$y=EtPN=e{U`4ihXeZ_f?_e`rXm0kX85(kCPUqu|Ks@A)d7j*wZ{^O= z`PvOVo%7C_3P)M4@g^2d;Nfq&=DDB~naOrX z#R7pKko-eY(#pjta96uTGi{C)3du#bsX$M-~Hl^HHwm|YDD6RV1z5_E0YaR;x|8+W#n zMQCG;k8jU)+PStO+YIjK%%dH;y(5O*x;P0>!$q4CsRsa7Sqx&kz3V2n6q;*OF$NQ7 zDG1i~$g5jDX8hybo2qZ$NZHqh-ATlzq@>G14M)Tr#-uv_VYyGHu&18PMFd5(AyYc3 zBmZ0-9#_Zz8d#LZwsX^;3>Jbqk*P?L=vG@fRnm;S_E@4+*Ro+a>X%O!acF7!=_^+HbmnmjM3s)s%K|G%OTeVsY0PHTn;%#9rN z8A+O#1G|4MK#1**=)=VMkiVo1 zNv)Em>TRzCf&rGncIvjuW&gJ*w*lUqY+uR$ob7u*FeO%DT?sb22 zw6a#U6_-~!YO?8{{$*$ZrnU|jS;rjTkdI2CYrqr=wA7b;!sXyuE`)8!36{aWDyxEo zEY`n{KfPa+NJh9eP04CryhA`=>|%m5V*VN}qW?ZxAe0KFR z*uEY&dI(KLsHzUBVq>Pk#bWTXlDyEPu55VfgOPM2EfpBE=@CjERCGCyX_a-!RXkhk z8}RIkmsoTAk&zh883y(dHJ!Z9xfw6#?vi-&Iyuh~ufQBjqcy8sVBC|HriHUz_y^8a zc%2UvAF|)TOEFuwP+h6}Eh#wMP#fm25^fvp&Wd94)4bA}N9WlM=n?@cHwQjK1NE6M6yOiEJIYJgD^cW(J&DV! z{R}6rgN57V5ppH8zsG2v7TsDt)HMZO8I-%D(icpw`3q$2akg8>5<0k(X0&;(j+HXC zSF1!Rg#EGVLEACErN>~8L4;WqkEtlPo^W#U2~=2)!c3b8YATIso8Ao9;{GX~ft@fl z3JDK5W>^a5S2!qT9D zyi}}1S)3828FdpqFL#V2)9V96^Cx3sWqGmPoRRidSlK`z?4iH*0+@xL$HV7av#3o* z3vzj4t}~~X+&HqYv6f2GW(&l#@W+%NRx*O_CY+)amSn#3dt<~3~ZIksu9VX;?V zOszrpBKm`r9eH80F1`MPm&G5<5uztJpDo9%5b9>;qxnF?qE$W&`>a#Lb8q(z9^=lrf;fC1NM3<|lA)$$B5RJsxFR~T*1C(w)Sz1!dfIcyH< z9+nA_?Q3=EK%!9*eN~1J-aTBZSQl{vr^JGnpkC+btPB;hyZi}^ZRDnvph^S!1OuCl z*6U=|CNB{xKyjUv1&7BfDT^>8Y9BI>GO(>qYl~zAfVZOFj2LMj0IKdbjl$%6+~FN? z8Jd-5xL4LU7%)QXLLq`33sGNkZLI(&%q`*eYIkhI_e;nu%$^?7aKl=|Mm&|y>isV4 zo&o+|l*uFnFSXv1^I2CJsOVyUw;DM^{*>#pR;)~gnvS%|p~BL-xNYy`^|^}OH}n-6 zaDjnO+;KU@;P78$J?QsJj`^T*l7NxL$)b=uXbjcUwug1I3*ACsQF0^Lo=&7;!+bmP zM~9Lh7_bnvCTN%=ydDb7MuvhCylTn!XmoaaVa!y_>~V%%OW`_bC{}~z6ApD@l<3S6 zGujzo!cHmGXAnUj;L60THUih%DtK4hl;)vRPtXF3Xal{)R`|)3y~_e5ltEa$%jd(O3dRH$v?4o>n|#9+lnyTwCOAq>)sP46eF4tHT#MhsQcme zy9uu_*6X=72iVQt_E8LIZoQloaRA6#FIBI=!Rxm2`dRG~Y?=9KkkZby8AM>HylWO% zu>!@D!Td-sQ3OX~zABYUw1M#E)|Tv6TR8%C=S0Hh+K>A9viNgZIFM{U;>)w?Xf{kz z6N(+YyWXA-ZRvt0q|G%0Pv?ok0!Y*;3XLMe>NjViWNGFpXccbU^&J2%rzGMov_FVP zPmf&aVS|}hR7MPk!@9c!7}`5^G;KXfvezI6(h(+L)eVE-L6rYE{R<;YblO0L#YzF* zstXd${FcE{Vi`PMXm!!0w_bpMqk71xGCrv^p-q_v-LC5;K)66eGPjsl&;2HkCGLEX zf6W9ZEWkmWKv`<+8TF#|@3u$6T^pOWr0h1Xa7J)!7rYTfiP0`VZ5mvkgMWovf&a-{ z;NN^gKI3*=bdWL^B3_ciJ)R?a?adt0z9`fMgD-M-PM7l0U;z@Rwg|ISRkc>f$#_6v z+bpr=P$EGhNOE`+1}J`XGtK(!91{o>(>DxI-IZVQ%cyQ!VB()4hi!*imB{N_I_?It z_teY1&6`|BWY9bx%`hn6rz7 zzgT+xIYVpdfh{TK*hypCYfCJsc``>6QMd4AXR_5?ucZHj$Bm2}yv|_P8cblJO<@5| z?=F@jGd&>^kjdjTLu+@J5L8U>Ryz>I&E_x(zgiHI&7C1vwdAF^IfV!kcExD_W$$G| z!N*y69KE>~G8b-y$1<8@PcWJET&;USuowpRX+-fg<|VoIELMWhGpLONJ{AVLq7pV9 zBZF=B9_W$lk_A0Qmhh}|R7hENBq~c7re;9Byz+5_IxA&}IUGJc02Cq#;gZfP86xT^ zd5ne4l1nVVIWDf$E+0`0PiF#oyXUBW-#`LLp#=qcvC$WLXhb{bbT?O|_O@UaFTV|u zMcrRieZJ&@bNDq-;=EDN?Kt5Oed~p(rt;%6P>CX;>s@bt^WGS55>Fop_wppCHOff- z5ZygJtkY12x8Ui!@?>)KM!JPV9VpUON9~8{6WL*dlY0F3Q+29u03z20M; zVE+biw!is@lP5VmHQD9)F=$t4K#Oxv-c_+L4r+Wyn$`q|K981{S=q;a4mwAM(xd_u zoZmiAE4Mw16X)?Cl!kVHIAm$>+1;|%>Rch;iLeUhX_1*uZPG&?@_x6!i ze(5hl(caY!cR5u`QAAX_(h}+EdTnQ)dGJ&t*K&Lq(Nwv^--S<=`$m}#VZ;QUE#d7t z04y8U6aQ)5?Vx%4GLUrQs;3sQx$6~t4eCJes&M(Yq4h9+!;ciPY%Yr{&fiz%6!Y@} zbHnrV**5x3RkD)tG$R**6q1HSqA>WQ@!h8*IesJZhU}^WQz+Vy`-N|@;{jeE))o_< zHIVEnMfK>vj~cu^z_N!Ma_PyGg=u$Uh)37}q(n6@Q2$-EBrxxdjH1-nnfHVE&V+FwTrYif)HeO|2eQGs6iZ4rX`E_^Fyl9bgb`iRixxRL-r(Y;>q5c(L z7KzWw4kv%C(dg5!vyNxXi}F&9S{P|)ZI#45I1s!ClFvp36qjNP6aF0r5F;PdaY+7Bj6!Lr9<`$zB zHf&U(Pu{oHU6ivusW!nWm84Vdy>1apOis3N$vm?x_j>(}#Av$0Zj7xqJ?7rI4la4& zlF;pKvwHR+fAhxwQFJDbO!)sF-{vT1xfx2#HFM3JsU~5YBbyC#hsoS`AvG1bV;JVh zu`xu*xg-^G=PWXP$`wmKA4zw8fBXFr@Av!le!ia1=R-gH$?>b-uYVhoFXas>eYgFR zM4J?I0-52bP5!qSYFrT$?>`HBE!nQ7qof`O zNq#N0e`D>027gq*%V>kPqVR4^u@lPA(A1t;iRx*F|zbuWm^!(>r zZ%LKOt-tB|?tg(N>c(SET$%iF4DoEst0Ci$sob5dw*9BgKOIAZRhuU*ab z&CWc{R9i|smiL<{ikvW2->ew^k8=XjTBLUJ;7o|IT1&{7bkluiQ`h~+7tKd4z1PlK zP3-rcxH$DaPwLo-EB-HzO<5E$OP~9D?j1xN<3-{M)7Sp`0*9w}ujwS+m9s|ullkM+#&7MHKYG%C^nB7o-~DWs zrl{w)f6ns5S;&@7-UqMi`Z7Z#5$2}l^%Lj4gU&#IZ%5;bE-To2#+^tWs)BtOP=EIR z)N~(j9qIBkT8uVDiv| zFj(lhPi9x^wqDyewAb4EF$dfpa{A-KvkKO*ki$DMORYmd^L zXfX{9mz#ElYkmJ3uO%K68QuTR)bo92zxD3DXXi^ry&7%B1;pP6D@wDUm3-{(46j+2 z_}gzyfY>CX@AEHJ3tG3^3PFiZRxiyToZ1p8jmgvV(w8GWfh$%aW+82V_d=)pUYM?L zjvV-$%Us*v`j`OY-VhF->gK-P?XFWOjX1F~a_z$Y4T<+-SCcb8$XD12=pybB?oD5v z#;y${kNwb>8~brXIXms?{*B>hcT4}{)x{OgvHl(0|Lvx~wmuT&-rKKdX`z{>set^J zN7%o$c8rg|?q#&$S&o;M!xtg2c+TSftNlj1`Q9gaRcD9Z?_cE!%({kZVGTdS?Ixp1 z2P9Y+an0W;i4nB%p}kQysPyASnG^PvR|_j*m!sOZI00)n?9)C+b$+RGdH(mzzMIO% zQbl>ufkIUO2}pzXd5Kp^{g>?Wr<^Qa?)%$=V}~mC)h*HEDYycIXu~M8;>A02B3B{B*|JRSwTc+wY_=k$Jf0w4)R{5NIsj*uY-@ zjook6ywjF=@)PlWRQ|Ic7dNeZwW=zg?t5Jnd106ReqZxerqHU;h)1=|@h+B&a_E1p z6~0nK`~4T%ujXmWWek^3U!Cvgd3D4N-jd!A81G-Zm+56<`Cr5B_AqwI;R!f>Vu|*G z@8^$;CRvQ^~T+Z75)yYIlhcNWC`@^603SVxT>vP>R z(&F$4a+I=-@SbZRC)C^SOo?;V3k#K}+J;siN+U*RtCbPI1Q(;kd z4BO4qn&0(MUftUmv=w|PJJmZD{M?Aph*3om+8f_$;RzpbQ`>pn1j|YOX zcr6l}XFGbt>u%jO+3(@Q-`DT{;NXVt=slh90fO_Xy=8b%OiI) zE9#zAWGmhtTvN-QY`+vCEA7N{I`Y;w8R7Pu0yhkAT@Zhl-l2$HeJdD4i7pw1_(^jePOc;u@0HG&U= zuo0upeykmTn9zy3GgDiJ0`{CS%%9y0WH6s^U2pEY&kp$aQ~c@#UqnIk%5C z`_xVe1nNDz#EY>Pjus@RmC23DTcDyZv<_5c=`@?KXia{Sk!RnM`u^Da!W?>-8fO$Y z3ajA(wfgYc{t*I=sa7lb@Tg~yHCu6N~oDTzPZ*O7K0AS|1OFwew zCiEtc(i>~%?j3+bd)@jH+IDoeZNXfQ(|N%Z7Fk>Te}Gkx-dX>Qs+*0r=?VP?ODvMY zFEKSoO-Tp_9DRIv#?W_QH)VIezrLmV_aWrdT5Lj>tM#KE*iYr zB7_zsOkBn~vL)2HuIrAYw;WGr>2N+A?K#)EvnRpPbXb zJdQHGmKhoC5Wg3n^|fptyvuEUdi`c=Wmc27N?rA}p_v`8F%vojjcM=9ycA*Ig`1T= zX(Y|WB&F^xfcvf(`n9awP#SdIk2?E5m~CL_mJbTG1?S|xiKG}~LVI6E!74&?4Uus}y zpnpi);F~GJFA0TRbX+Q=81oh4za9KoPbBLgTu_W z$Qy$n6kEVqo#}iDHGh6L(+|!PG|zfYUagYEz5M1<^IL~y8&nVwQ4wJ(HL}h#F%A!p zyePr%UyiH4zWQBxVsqjz_;ZE1-hKI(zh4gjmEUyl|97b377^DTeCN35^uQ9 zWaAjAKQH?e{fy4?AniU&3|)}>;(p1-Ali2Fb@BoJ*9S{ z)M>s4bYsKeiCyEfn{oU&#CFQ^RmU!JE5phSBm1wv7}1=L%{_#$vy}&na^FVFXEWLm zuWQueyJ@g|_IowLs?~UrY1BZiSfAoze*MFmQ(%j>e7%opqK_0Fdu!x|mf|YvPVNg# zpfxMS992R{xZzr}E=shZYPwFq2>9uE&A)tAPenTt z`Pyr_f~|PCv9WVP>J(V7GA}=`KG&ucES-{5yVoMr8IrASzqtT34>1nO{3FsP7d23) zSvGHT=GpT~W=~%~bJrcY2PMekYXG;T8_&BDv^-3qKwf_53K@DCd}bIm;(^S_l$dbQ zjZ4*RFF^ zd^!u8T&Ful^mZDWWp)%i>eZXXB~meE#R5UxxH$e;f>8@{I!-#4V5b%LWXmEh_44d@ z(WKyz?nbT+Z?wcTvc^4**m%o`ym>ry;+?N9U(;@0e$lm|lsM6hM*@33PbJ+XbWOpp z55<~vklB9q+d~%(Ky{y~iDz<&=E=ZW+_+J(*06aBXE1dqkJ4<*U>CIbV) z>+#>zB~b%j{{uYZFRHjq3%YS2H4xzW!0$-A-=PFI2xuQ$)%@p9baScjF&AY0<{j*f ze$b^2Z6k88eiXd65?jGcK1KE!)ZY*E!T#f0iT-_lD)S(0281(!xyn(fOQE&s23+@-D>@8gbd)TCP&eYg;gVcV@M) zmqy^*<^9SXKkI)f$+?#N%qzV8hg*u~_Qj01nwx(W{(CJmvc7*Dhn;O^{Wg3at#nar zs8ua}Xz1xsI<`Wc^>>f_()@$zd)i5(tqM1ZJ9p+4#v7)`o_BS8y;ECO3;pX8I~m>A zV6^uAH~B%?-R!ugL|%+A+Wh?)(T1vzx&w2+}5dk zAp?*6_R?M3+V_5{7BwVGyiAT*Hw8h&0ZKsw;9_z z3XU!^So}nUcY+B>?2a}Q2dDY znXnzXY9X!h1CzPlxgD`HykA^9FF)K3n#VE*)~YA`WR`FB;YKGyQoF4mA@18s`YIU^_z0|?XhGB4gzm{o9>c2Z#@XInv<^4OEq!vTqAM`H zCF&PNn}qL7H)nr^vMqeAI;*9YM1!gY&S(&x7zXmXLNV|sv38ePJ#Zcww$Z8frdd+6 zRiZU&N~iT3G&doUe(w{1aJm22Z;0UaQrk#Jw5}5z_9;QqGnKR*Lcj1i)foOF$R-}u z;}>9gyBG)gNd+K2DJ9vf=i~f{Td1!Jm0G~W_QB&*xf_5uqUug(ck?Yv3a_;IcrvA- zyGW-v8#b0el+|lnI*L}+KHaUf*;)X)^sDTzqxWF5v%<|u!GT^22RcHHNAq~+b&Er9 zf(+)>K2LP+lcdDSIceHZkmggFo2I3w*-+qW@8 z3XjN?f}uJp!Xd$HV*BE>+b)t+b%;&SuT$^uKl}9KMzpa9OMpIk?`H3#Sb%uzZ8U76 z@^)4+RG^$GR32aRFyMAmBhoxeub|~Wi5_KtF7Zp?5OAR4mF4 zYI=4X+bjJ{<-oLC_B<3uPpNOQ^j;a;18tm}Pt-Z-F{uZFkt!yElOb;c!+xpUBKm&K zEAY5=`j!r5$80ms#|lUNhVW>tgDpwEI$+g)DYF)%@R6Qq-dACU|-y|w)3S2LfXtAKYtdTVw{h-8%<4d&m)uw;rk6?+nKn4EA z$KB<~R3fK4;XANy(<1Q@k3+jt8-ZgSp=D*&0|8@adzMf)%1qxpJ7|>M0sO(I{7q*C z{@g)bJ5oi2Sn$AFLDTPlDtT5Fm>FpJ6;7CmkK)!Hi&g%w0n zQ4E|}bLrZM%~#czq9WE60k&^s-k`lGtSPW6uap7T+Hzrm5FZazx1UM~yoUwfFQRl! z?SNWBEI#zUwn0i1@eWSpk8C>*%-*`S9nvwx)!_jPSt2q-$;kOjf+;u2#lh|z_m%Tr zgD&5A-N7dRL^Fy2Rp;#*AVl=AqqC;x+Khxc@XK5aQHMWds?s{Bs6!6uhRF61*As= zKe@cksbkI2Hj!#bex&qF`h@N10CGd7u&|_!(VAxELC+D+zhy_p%}J>L9xl0v zJ^7jDeMUw81$>?$`b*6I45hFOH)qG1<4{2;?@YIFW-uVHdzMhEuJqX6)aRXo<R!>Xq22?v$l!_puug%GRdlQuoxn>Nk<1 zCV?nwMd%#*kZclhdS)y+l~*|8FRNmwY~4AG^8`*7(CQjW3%ePCF_EHmh}o z$SNH;G+#^W&kTo1D0!o_2S%y21mTQ}?WdqwP9~J(Ly?L2r}#!M0vOc8FuT6R6tk?u z3s$&{MR*ybLR8uHh4)z!>Q8DVuW!)*tUe~;M};2g)9bxSSxo>AIZW6!@)Nl5dMeRv z#+T}~H@ec6r65cM2yBAn{JsG-VSA_MM7VtTa-{y=b4AB9EVB_Uz}uiA{|)o`xR7CM z4xrBMLTGrv^Ekg@8t=&GHF%Hq1hFxx@hoAu@w=0TS`{Gg#Vh`t-z z)41Zb&lJ|fKySffNBK4wF;fjtE)rA|1K5Z(*bJ{pNf_d%*PKpkz^#VxS#DZ2RBu#W zkuedNuCH&IY>uWDBRf2{-Ttma-=b~X7l3g;v)|3?--AT zWsK&Fo-xmOb8RHGBjx(A^vTB*YP33$H~0mS!vw=#auEsANNQ~cA;JQU zFQU>0LyK26dU~%t1KB#pWpNMrF4-ACs;HQM85eqmn#L8Q&r(|7UlP!xs;`!H&PcL@ z-4|7Lx810GR@}+?L~0s+?s##l4p*4{(NTbd6pC=`&|AZ39i{)YH*$sPc~D+$=xc>C zRGTB&*m34Do==`O-@u#I zcLiTAm6!YwtA8iTTxu`JcHPA0E?SuF4z?GYAg9=)=pb_HO?I}0$4$Wp0rFVn5$tlb6$E3C!jfKfCY}b8+Jm*#P$W7t; z$6MMc7&6quO1&XwIG7`Xx4k9L-iyOf6oW@Q{L3M{n7(siP*G*Dx^Cu273m_6i$;nP zZ>R=>$mw;akWA^3A9-2OpB$mX<44}sGe~T)I66#BgQizT&`7OJR@feconNB!XADZ}=5P(n%0cm;$ygIFA_TKl}=Kwc|W3NJP!k z{B$~MAdk|?HF_0;ne1ZM&rSHZQO5DSWixViF>6W&Nk;R-iIuQ9;O|VY4v&lhs&~E| zVZ@iayyFC>Zn8fw8HkE`b^SLV<)f zf-`po%KZ>~luHjUWWOdS_=V7K7fyB%fnA>D;#V^nAwKw{Xp&SFuf}B#D|zXBho_qN z*=+}y*pk4v*O##xo*g`NLa@YENT6BMmeV^KtMfiji$hKqn@oyw?Ngq+ zIgX)|DCJgvnLSQhm>Gl)7LyAcEo+nN{wJp4*SONAgo?CL?;ONN=h~nG9zzt|jQ4q7 zK`60V1Sl5A=q1o(_Oe#pCC&MI8DDOTI2yy_ZS$f{F+AbWin|;)&za={=xU?NgI8=< zPZgEkeASF>=f%67(JoO=SfqOdnFQAxE>)ZkIX$*?HJL(+*H%?lXHfylDzO%Je!_9( zB1t*_1AMDg6CMS+SaQiwRJ*-_@JYX?p<;6^_uv=A*W*H~aZr;%iDnX=pf?n9UuE8+ z_=Th6mK5<9Ky^Lv0fLn9MuH`$;LQ-HeOQS6Hz{kquG!$P1qH{gfusq;{vkHZ27{uD zi(y_ZY4(cuI}3CAtI?WX%k=umqWFclc_e>>0=cHhBT+WwG$YJFnjP#=CyD72Lt>nq z=AD$sycObQj=jhf)t_DvPAj9#V^sc2C6d!JA)_9QkMX>YETUSg| zs@`H;_SZi)W8>+c7lgi+F3SrXrqq-)8tP4_PS0p(rzr>{V{^J@FcxW=iq^+roi*nj zFM!Kh$v8^fec!V5875JUffS=@gyY-!$|INuvYBIa4p*?)M)TK0G?~9DAECfpENIr- z*yFoz<37*WRM*E=i>p1j6lra8&#$Knd6@tqK`o~==~5^W^TohhnmSt2hr%i5D(Yf% zlb}a!3Xg3=ja-~5)A+R0>8s}oU9he9DGR98F}p-JXJOyqh?F($Lmb?+2*^p;5v!f1 z94SPhy`i#k*kge+B1<2X$|aL>S5u$V_&XLzO^V1k+$){#Ir-?KPI^{uGcS)QO-==c;5P3=c4d@%92Ha}Jv9j!lttOOUMcAbwH0z3O zu9&e+U1XFf%geNqPyS&o7W)nKn^rVeK#ABqD)seSY|{}=Mfow+UB351(&pS z_5EK=N_)R#nbUU1eEsD7p3Zjk?(i%2Fu-x6cR!P4qP-P&DC?L7=2renNJd>qtC8}w?m33Ya9UYd(|2SZHb_6jOrnyMwJ0h%h6#iVa> z`6Fl0qx5iA2~rmRji1h{k^iQa6WR+V4&|)QnI-}sCJSrn=;mx9o~wb#rqSm0jmx0redwIxTY5k|NNDXU5bt>kMFdWNxA| zkR#^mSawIv=NLlP<6*7A=L4h{P+}CjZajz(K-W(59te&P02> z^Sw#IYS;C*r-}z5=yLnJn@^1Wnir2*AgF!ux*3xF< zP}x7WV*oPYOW7W=#1W9^`}lgQW03o1_n*;qG%$R${moor$NvDwi+MG=F?qIXJzE~3 z@yS39uKQCDFBHU%a2)Zu<0frI!iEj^u17k*h_&^pIRMU`%YOm8xCN&wX;vb;=7jD1 zF*Z5L-vFKKCH~AkVv+(6c<7n??A|lBYONdJq#KzTg~n40Xo}*J93(BkM13D z;9?$S>NW9zZpm#E3}|1mE2KV8#;9OkZ{H0{@WJ zQ4B2B6MdCT&0Y`zFD!HLbJRKwXmJc^>+)`BQQhJkY7A((yax_#x$p<|4b)txk2g5A zQZp+27NxM5)<5%VQmCM10lzj?6v!WfdTk4B5&vXPrH3LR;UozA42Xh@S(YT)?HRXj zvR;0tpu(*#du8f=hEDitcS*VlpM4i9??nc=CYDXu`dQo5C8#4vTfo&yhYgZ4#J*S7 z$#pL>K(EgJJ0*~=@mc8=yA>;q`e(V*~b~zoxtK!bb-#rApU)6WH;6A+R#;-k+Q47 zx2p$g8HJ5H_xTy`{-nz8`O~Me@b*D|R4$OcT@Bj4y% zpk($1%!S7706xfm+!Dqs#a7;*=XMH(w1k{AERI;>GQU!Eu(r@tZ;g1~jmbd7!7bEj z@6YRei-X_oV{Cee7mTW`HZ6oZoza-@3#GlcvBjHiU<-p4OpdSI^_y5O9zT6WOc}YD zFf-l-PgMqMMG#?>b;azk$4;5DGoUyf+Y)bGMCtJ=|K6>f z^-@`kF^tN`?xUrg7`!a0xKn2urI9kML7s&uSk4F=6?9<;CJW$Epa$o&G4(-vW#nAk z^q~2eef?=L#wLbD91=OY$=cFf>NDOG%(4~rG&m-il_BBg)ZpN-6p$|w$lGQ;AzPG! zPs8o}fAWsF{aNadZLM(=5I$JQMqvUihmB~A?NO16i^KL8rm1g}pce{@1A6(pinWBt zzjNaIyaao6GfUe~R-dvMc}o#)`IOmIHd4@HFE*HKE|X-KxJ`ax04LB=G)-E4GEc;5$cQkfx7t_)YBZ(mn2wrT!0vPC<00Sn*E<(y={Gw)~%@fr7^%d$w%9lx$ed~HJ5{kb#N)|VA>-m7eKNOfzGyhd9ihz2_W=kAlVkz3JdnHN~QVSmKMLtKClh}e25>`-@twWfV*Jf)9-9O2Ctk$ls)8&r!9*sBO?tC z`HnR?(p(n!OIUj|LCtr3UYr{h>RmaXNX>+|+F;zw0i2%P?ux2sxbPreS#r%Ow;ij| z4y2vJUI=Uct<}pSvM-e9`m=-%2VL&g@j+i3TC)`9 zV_rj3=~F|qzW>pWLH^c+NuSbnXykp~>{})&GXqU;+(&GSI;1T?f`sl*Ht}qYZsWdq zxv5gg1RfOe$Cy-ADVc1LGaIVDhSUB4FG?Lg3F5~srXT_S%+Cg5R{w6`RwQprle4JG^U+hax*uj0MayeL#?*%Z57YX`(xQE%N{X6#6` zW>a3JHIZ?>*cLkDmi+x~P_CF(x9DtZzT!{%-q`U+K>Y7+`X}|6Tv}^gd3N=&&4Jdf z;#nT&OMCNZdg3F7VEu&}sGYM?t%vQnB-Eyr?g1>iUb^9JTU2S}V4N>Jz$%32v@~l} zLrFEdBZ-KSq(h#@1YwAIRoQ70o)$OCbc{kv`Aei_bxt zI-is}#PAJO4)Z?m91l$Dx3f=LBi8A_UT9D%Y5|vQ>k~EirrJa@zIXyz7oK#hX}Ye+ zvtRrTKCNNqLqV$##dY=ivnGUH&l~wyePKrkfN^!)A;g;hn{^;Nzo4?F!s?B+e8~<(V9S(T6s68|j=L7ptc3|AhVd^SBMdm3K=?0WyIi0W zo3J5)oW_MNsYDPif_K^#X4$fMb%&qG`RKN>6+TZU7H}N9ftj#M?JNPJ1DHU(ASia~w^|uf{ukh^Uqb zfU#T|4fjf4&4|dSjo@WSynaTC@u=%7V=*=hvHgk9j{FX-34xI>EU0_A(&Ytle9%!>aHlM4(UN1JAm3Me(WdV(MkRKVfgmkoXHFh{OB6hvd1Z9mV!c@fk(74g5lbe{tL2q1l(4{0 zyLz+xNwBYzCk=%Xoq=wm7rwKD#*zBh`nbTBv(ZC2#zQ^`d1Rbdzmk$4A6((b0 znlI+@OJaOxwQ9JB;0_%B@W(0ZAH!+v%Tu;{?}(mDy2&L0xQoH5BD%R=Q|frZjBC$` zJSI-6??y9Yw)0W`5O#_NkYx}X1 zFH=>UEQ}pJvrUc&o)m+~xiCW%_}y`-2YZA$_xV8nl%QlVy(}LL-<`5s7Ix15t+cY+ zjZE+s`R{m3vBYs%Q!OrMA)dvA%vS$Ft{xbV!<$5#y)rvk!ZA1+5llJnhey+l5iG#X zNtgWE0?m_1^O^>I#%#`&gDl)0KRwT(&s=2#{pwA%BrVANvW~+M(s@>WtW94ksUBvy zdR?HBoqMEeFodfsV)8cV7UM=K-}Pn%NJrCB_s9XF(VkKeIn(3$dk(t8#}O&5nJ-hl zJO|1tsTJK=5fg6Q20g>#7Ra6ZN%mW+95JmwZ?feoJI3xX=T<%(g0T?<&{R+?z4)h7 zr|9Qkp~zREJg3lQtbuy6;-trm9n&JQD}FyR6}&bkdswp34Ku-YcAPGh$cb8i8^&B< zBm%k^1R&I3woxyDM+e^ar1GiD>Dop>i9#!$)=(9-qz-AuUxK(h-J{1aNPs9y?M_Iq z+O&7(^L~L^DJ%$iECgu&qU?hiVMfNb!Z|z%DedE6$a>OPr>X2c z$fgYzJ-OPXQ$E=-ea2)|^k>0I;yo0#{udB3tLZ@3=^hx#THFQXMoO>i)u8lnOsGIb zK*KEGL9{sNXno!$J zdS@ytdv|bV`gR2wH_@!xX2xci?dk-ObzMD=pHOb*EakeuGAeKrF*rxUE+`8FNCw~i zDIE7ucTgbar+=_5JiX3p1YRu;FYfr2I!9i7;vXiE^RThO!-Xq*A|+#8<6fKOvaLp_ zlYds+*b%}{8q&5H&Q|KhbGqsX3aJc>C0YPD{fH&sF7%U|Ka3R zIYM10mBT~OFy$ih6DIMCeN9ckR-&QC*lBG>ejJgqwz}Xq1W#OWVHN|pQF_g>5Vk2y zi_MGzv#ui@^+s9;?SrSZ=(x$(1ON@Cd))PPGltKpSdVs-zIPeQLqJ_$vzhNJW6puT znU5{FkpUvXL%CzqG+M2<8=!(LI>STsDXqk?k&$tN#Qs}3OoucPd|Z~#J0);$5HViH z;9nJ3M9I7R@WNi>D~EuFH|u5(v}+H8ntyz0>mg3>%GIQ@M?Gxv}2)5?w#XPFM`CjYwwLp>&g9qQwA)-0+AWxFcN#Q^6!KN;F3^B5)) zIp2P|U{fqRZ8i#wT?pKaZ!B&*Gq`hyAk=?uze8J7*8#rGkK94@cYqEX-O#uUh0B^) ztd_yp<@~Ld41q6qETlNwln*73ZPGjZOWwFQBMov%KOpQU0_?)RGH$UNM@+ssMkIUj zYvxXz@tMIau4^ z$iFP?I^d@sv(&|gQ0K`lAy6KFknSm_EpUG@(>1MrEhk3&3Q&b*?$neqXWutPshbUB zp!F6t?)k0imm@70b$6=Xk2C4_ts{GnK^37?U07Lz0+3YEIa*kB-5QQDVE0iKCS3EC zbUrT*+=2STyrwJ9=&MZs8VFbtF>B71I;VZljR!_+vNb;AaK!HljQ&WiqRNj@R3`FR zq1`oP8K`RZD+0AkvVREDeBfM25T^ELRet4TRI1hc7FJoa-4-Xth4G@xfq$lstqDiJ z0<_i)|659SuvHi@2;;B3jG-q=ngRHbTD_$80p#+cm#Y!F6DVr@STpEC@B z{Yka2dU2tJqV&9x{83^SlWic5QO~_m18K@A74}KZwGS<#)t=!XI|2(?Eu#Sc;UMsq zt{Y&tbr6eQ-1GLu0y+-$T6XdSbgj!-!2Yj1a+)=SzZrI%MUJC*>n**8hDejRIW5V+ zLf0>8*9Dx%nj!?c;T*l}OUQg~scaK=X-v|Sw-<|yd9__cqhLP6G3m{T0LCJgUArw? zQrc=BDA;4e!~B?XT^Z})HMg&I@O3plYp>S&#g_3XW)*<0z#ir^1QhTha!vDQEcVUl zipwqx5<~3dH)&|Zx`UYJcJP<-3B@#XPh}PR7)U@_MGiwC*CJ54SoE8L{PyA;j&GM3 z#+L>Q{-E^}440Y&%3BDh=T4~b>ThZk<6uNao#W|I072!FEhe!taE+gaw?V|CCiyH^ zkH|ID9~32(c7ys-n4blk*0q6k+McYNRLF5q^!-BXiG;Ay>cVW0lcz_xli=uVNfDpi ziXkTNv?Kwygq;G&dFWS`l{F-RCSp~m1X52rHpQc^v$ZuH2LAZ*az-1=!~}QW@Dt-RuKBt=XnD$8MOOr z)SEAe7}_AAv{4ovFE6^|5-w@sUGT7z&vK@&O1=;sgft?m^l9uw&yPn}Hd0!AvF%CR z(2_Z~W>KLh8$-{>lXCd(-}q@Y{{AInjZN7Jah?>@NtrIuF%p%G=@nYK7;ml?G$9ah9PUV*~0*>+OPj(Bh4@VN(mxR>p)Ql4N z2m=M!c_aF02(HIjBEl3rbWAg4aRyIJq zpNyoWoA-r|L+Jz{)VatizR@F&&t`{I$4l{1VN=ZGuuRw&Iu+r@Qv8%G#9V^s#yZR< z1_|OqP~~fA@p&t4lg;b-h>qu7N(cr7=);4o1N!(`^Z;x5f%Bcs$J*3o$(XUe82?uC6GWJzO|aMinittK@5G=KB9 zx)m`s0Ou7WO>mYDdTj2Uh4=CCkAJf23Ie+>2yOOe`P{0ggOwt_f=qGEt=*}WW0lem zk#6`#j_e0lOV(X}H7D$oRz){`ppTmfpB0QXjbtaaDSFPRlj$IPMIY;O$@MLHqwwk@l9>1sZ zX{FaSvvC0Be+=$woE=PVlcX-qn$I9Tb3MMoC95S*r|pq?7oY&_WQTu?McFitZK}@_ zfK=AfQJH0Nsd^?M>RC2-QG!CwSp1~{4VJKB_TA zXC!DnlA}>*q}Y=3l;*O$aBH%V9mk@-zWaK$`$FG0PP=17ZU(Th@l>x1%*RjfmoVmy zm+-6lit>2Bhz7T|+znG2MRu}&>3Eo*vf((3w=be}zSzl6>Qd|EAxHSByw%c_9aK?n5g7h;A z3r_cQ@AlcsCa8S`p=Efl+8fI1W#^Ohb&Q){K_Yq+FcF?BT3{N$JT9eX_h-{~>F84J z44=ket4C>bfx$AP6pSnG&a}8-=9CZH)V5{_)Tc+!$o~WrykWpCX$FQnC6zFEMX(rp z9wkPbHSP-{$0vjDp(JtL*bGA{URfjIap29(MmA6qQeAhq9W+K zu-MSXaeaUNU2y+!<)B`*4lSW1;%V_Kx~LdVro2taGUJ(>QV(b^gEU8&J7VeCQN0jX zd=5t;kgXGmtUY)(=rvkBxNsvN-i+ ziTl$9=}6&6g~!RP_P2hx9lioa*9GC%$!RBUc}gXKB!-wdbt(ejQP-AndYuz87hnEJ z1;+9=Nv_~{mPB7iwiVrYqxV1yhbfZ$f4Hs}3vC>gyeF7k94GGRi~wkdRG0fx@qB=q zdTaGk$@W;I_@SBr0q>kw@t~Qm`Muy(?=%Fzr)zco@4lO-Qv++6hxDd(^~O3+B#c86 zmNXoFCF|zF6J*-yU(Fv3*7>r;%KAqZId45uAO5^8uwV{6RUa>P#bM)P)DA~TJuKt0 z&laCBoYc+uh4kgj_ktrpcWZRmfMz;a zf&_i6o?k?&qwzi=scrKw;m^2Ra*KVwD6yZC9XpNF)Wd)&pi$%h6Yp*awAQ}R^+pzT zxl+8h$XiW0-C_C5^?YvtH=Do8CP*G5+!xAihIlkfO0RPaeFj5n)0s~8rfmwVB1QHI z&g@e$g{0hoK0M4eb9$*CFUS0)4RZQL8T7@4|t+?ehtV`aA|{fDCPOJHJA=I;^ymMegOIsa2< z>8TC2+*sctViC_JjVuC;*yE6UR=B_|ieLeW~aUbW_f5t)4j8@}CI4ukA z=}kfZqMvylRuXHw)R}BU2xd%EnUcsDOr{7t+=T&YSrRH1k-MPK;f7LR^H|1X3Y#j%4-btWvKCXU zO}L;U(i>z3qYGEPcjSUle7)9B0m`qN@x9B%D>1!CS8+x)!M8M8f`!-X8|j^CBEn`7 zXk$F(HJEpGx;~~1hI}+K2;6wj7}Va_zNKj!bRbHrmSGkliXQqpP=v4x^NAuBg1MBn zFNBKnX5|jMbN>P&ENqr^+r4Sk0@G#UY|uJ5&`nvvKm)IN2uqlp;ceh$-8>gA9H=Gz zx#K>9B(euZK8HPdV@%E$$DCz^+CXEc+D8mQNS9)`Cp9)Fz@KA?JiB-a@8X)>PN_5( zPkgHMvGk)YknVJpXU9*L6wbOYylr!Sl#b+O!A=Y}>hWPLZWPV<6?MMN9BkRgN|Z+ZWX>%v&~bniQinj((Ww`yt{wI zCM6^`g+xzIZ9n}6X6^xcHV^{1>}K&RbHdQ3!G+pf&e!$EDuGl3=JWnqBE}_3K00OJ zCL6~(5r*yInjndupFizkDn2a4MR%K~RNI{RCb6-Ndh$ID?8Xt&W6FklB?X$)CB=wv zC;Y_2_lU4uSea|Em}5Z#-teV?6bB32bU#9<4#86!Y65gm`4Q&S=alpNt0T1%q>})H z?acd*SONWBq$Dg`cf11$gRC?gAz~1la_UHRWznC(nlsS^1|a%P`d3oopL)IuTB1;x zBNs1dOUlv#w5CasxW3vI{_BKZ**7XSTj35^bcA4@_jJngE4%=pH+lxTfsU(Km%A#X z8(%YT;tnZm0y-BbfN!(xGo6n;f>+M&Xe829&W~_IR5pLBXciSL(~sh_mX6k5-p>B3 zW;ILEp{sqA%&T?81@klrM)Y`>hT}J#tk5Q8@ROKbRQ)F6}fp{?tGmH(x5dZ1}Ip zEm23;m0-D1Lgts!1;F&D74^>F?H%^5Dx~&PXaFuPx$ zBV2in8KV28?7E0Eq9~?j8uQ?kz2$5G)BvH@0C#g;Q1ZDxl<=0nhk*7-&`cR&(~yz6 zuLBhyy*`!LqSoUFBVSTmxTseMtEx6R)iUdXK@mnpyM>9gwFS^O>0q++QIU*FM+I}2 zPfFeVh}AuTPzTn$lY|Z)m<#rQ4V{Twl4%>oSwu_I$vsIYQ*lAGLdC=u+z@d| z5f?PfObu~Kar?}#23ssOO#yMOTtESr)Lfy3DKi%|Lo^qxrqYreWgFx?j%wQ{$Ul78-#jNK6y9#&!k`Xih3pV;y*{CC3+)}^N#xIjHa3zgW9@$O3u^PZT;2m3o~ znEkJ;hfRd_a4CsuukIvWQ23>gQLAc9zTpLk;m=Ro*6A*M9u=wpc5ItM(|dIz0<}l% z&E5R)vZj+?67CRfZC3;#_rZc&rGjOmbv*OMR)I8p&?3}fOlLt$qE;Gcl(^ZXkEd(Y zAxX8mdCVjr$pN-_wWv!Im~7$Gv$6%e*L$5QYC*!U@NdUD%q5n+SE{<9?@%^%yz($R zR}LmCZ3~Dr9%_C(m5rQg*Z!H&Feb9`@(}=95%h4?ysh<|hdZ|1#zY{VzCT=agidV7 z2&$Q?SLauH7;#%;=wwuuVIRh<@>kEQz@7I2UHWC^BP%-0Q}bGTYO%bIn-eL(zg#+M zTs|g?m#qi8v&*?@Vv{Ucxxv4HLOeDS6V)ab3Je}uK8d12O5x|+Iue!i=}n_LLu?hX zI%5`JaDUZl21UxE@0H-W{cyugz5cC()?7>NnH8CshR$^>@;A4sEo4yzk@rgaTcat9Xg%S2HWvJ$i&d7npNPGWSQOWbn5`zh&@Q1@D4pJ5-~+=kEB_x}-kL1p;qaX9lafKDY5 z-VAJ;_=Y;+wC(Hz4`7yB7Ax(~_tBe@eR{$CG2nkW8zr_~u7U>tg!HO6QvqFTDzSDW z)d;^3#)+6qf^UQGWH#L<#-=-0TBwizamM9ytqy8h1aulCixuNs5ol}$uhC6epg?YV z)HOH6ZN^8_muy-W16Mc?gN-F3Uf(s=hopvkK<=$*=ZdL6Mw^43!fQslWl89#R~`C* z`2=f3m{qK0RF7-bfl%~Ii5x_ADrMEIdS|XTcT&EG?{>?b9?$pS za`wGa3C8)rmNiSCPy z;Q%{X4LT(Iv>#D%WVrqAM6A;-~FD~prLR0TmSIiTF)~Sx!|A& zkA+w5;^W_?Vz9Ivbp$4}cFf6djO(4%G~m3a1vDj}cSU8J8uK*J&}6G4{mw$nMAF5b zW|xg)R%g-;?+?#g_3%&O^oJT#3g9zAD^Gsx);iqeT0wk=cZ&Z4j_Bs<`!Q1hV2io(q_jKcK(3sk|J195eNrqb5w%;rS~Jat0kq^s2z&OE)v!k{U4oDax5& z-Ox|H+9D2Fn`H2MrIf*$gL-3n?m8cNXHBOt?#a6qtY28U1P%LM_k4PDHq~l#66CU_ zM+1G8B<%sZ)=-UyxPv+|S89oE_6kHlK;Aw&!RcpU{QQycpF|b!03ZI&hJDF5@d?G5 zkUy1g;8tH(zRwCNxw;n|Z_*ro$f_+}lf2{T^qe@q;I8C_0@j#{1_;>GYB%}-ze@?v8dM9$l{^TxR1>>VV9q~!WlwKw%AH`&7X81& zC6iSWNt3$QjmkFj9M@O_-U(akoJJ#1nNQvaiyUn#4_t!#2^GoO)#TTVRSq-L%qjOb zjMK%hVqNPy5>rY~(%b&@?H>ukGAmJ_G~1)ZavEyyOM!V3H^-+e#QLvGA)|orhdc*# z*CT&mI?i@yeFrXQ4`_7l+#-sY6ND$uAo9`un+6rJF&~b;8tH62y*;{1>Y&S=$sJ^; zgREG+jNcPx&#`vqRhv1w<>nRZ(o*K`i+7G`BIEs!Eaa$Gn6u$Wi8>b_6QqDENuh^d z5KqYL<`BEM>ngvN?b`eNaYEOti5$O4T~`4*c_ zP_8{jEtk(7;@Ke-YMtJEo>8?9qmetK3^O6|#HNTw^tS10dux{sEGTebSrjuz{)(vjC z$?(Xa*zG}V1!t>x1bn`d2APqQN-81K!RpS?IICYCIWjCSh?M2!EK63Dz zA=>*OyTQ7NS0j9@8BLD~fCr+YavjFs^MJyb8=Y>_-)4@M;Lgc`rdO!{a46b$y2F1} zI*NVM-Y5@0*-ya%truNr^U_N1dSWJs_X7C&!_d9D&n|!>g?vq}cjJ5sDM~#L>1Nai z8=^P0>Bsk|P$fsjF`;A?uG6kLI$OQ2fE!Nv=c#CZsi0#vs$??NYX$Wt$ydgxiZ5L? zH_0Z3^fJooDw=1c;2DaZ?5BhDPCWNUk43S59~#K3l`w*ajO^3-iAum6VP?NeZcwq2 zb~slgHBY8e6TJK_0fwPt>>zAB_}rb0MIytm@Z-oSiCT2&dgh5k59l>7k^U)HWo4<( z;?Xq?@)OgVM1GfETH$4WOG#l52-}y?5r-RR#QmU~oTMztXPiSngX5j@FhY6;H(;{_ z0dsg!xpk}c4b zz0VvVKMN$&-EoX8gML-GngBjlxvjo@YTQ0s_rbK@$MKle)3@2xOrBBBOFuITW#s0e z!!HL-XLHagC9pl?Rp^+J<>1@xcI>1u!2x@e`H=eB0aafZF%nnG2vr_kx$cB z0;5cf%Qi$5C0JHe$|M?4a!pGm(Cn#Wx!d+=?`XGYF<@|Ih(*&kH#%xTn}&S!Wn&_z zoh30^+=>XCM*P|O9iO70xvJUXvltpg=<|kN z_N|-un>lYf(=cM?DoamVgt9)G)6B5rHt zu^7(h%gP){z|j#d@}LoKrjmxO!Vww_`X^{Rha%M*o!|+8CHb{eDtcQA(AyyyDVm=S z990YWr$ircTIBG^-%gc5TE`(@&VLa63y4KKdpE1L9V2=zOSXM@-w4+#( z*RS$5e|OU}8MR`OqAfQ~*UW3Pr0UBQTiAgy&ZVC=N+y(gd@5K~lvoQ0Gs8JW)gg*h zk>uwu*qWXd9YneIK5XRuG4JN=g<=O-ZaM-+P42Q2v@mnl5H7yqZ%FwX7fMkxO{j4m z#2|4)Gq1L^fabE3QVzm(2fRuRSQ0wWu~s$o>wbGJf!xnmqk7qg$QY#t*_;xSQshR( zY4%{LNYb#1*{Ru9icE+*z7>D5;+5{6$@7{g^+>ySHE&%1kk;!4@jEu^ zv_|i7PELo`XlDWMPo`sgrDdKXb3Rf1UXf0A90BaCcxLQVGv0T<*hv&@)}2sp?AO}W ze1ffZgTLZ5WnEO?1A>1y{IEORm1^Mr4cIWb>U@ujLhbt>Kr$Q_|A@-JPQ z)I0Q<>Hl;RucRVJ0pHM>n!GI{3mDAc!-&cB$)sI{CR^S+vZ;2&)$P4&fXU%}F^99v^Ge)%Lt~%) zbpv82a<#Uc*d=i*kbZn00b2kqyCTi^ZNdGg_2dZkH>4gdiuk45IMasPWuINo*r-H${S{XzPE`3Lot>&0thVA^WR}7HYG1VS3hdV%M%kHg3Ha7Bti6QnwLwpAMUI3NI}6Wy zRi4Ml3ByYTk9sYR{vR<0YT2`E$4c@hREAak=LrH)@ZBuu}A{EI| z@=TNw?2%;#AseLra+7-GcRuSL59&WX?LX9nM;Ay{9It>Gq!o87kG?Q>X#fc$aOCvo zcHoPRadv0D4Qs*9{V+XLyb=@`d`H&K2R`D*J&@o!Fz^q5X=M0yRM5w&uYyfm;ZApJ zIeqwKoI2|DP>LfE5q z;Nux9Wp72X+|^y6mS^UC1nPLNemUW%v6wfJ%kl#m9oOHsrSBd0Z_aL-UEI&gyqj+P z10vm1iy;9zbwBBq-IG&5ie5>!k{CIBG=}W|Cu3YSL&&F4qf`+sy3#_2QpP z6AOTqs9;@%8Tln}Dfj&@(RKS=-H-(qrhJ4RL0VCf$%T-EI_I`s$i6NX&)R~kmMEsTsPVN2s%a#k?Y6^$#>7yED#e-WHiEwD zIjrr-^xE)^VxOP_J{kUDL5>k$_+=XPVbifor$^z)uutuLAH*1h&`4t!bzy#x@q0tH z9R)2Z%K3~blVK-bWW;o)*Ucf7juNkJfaPJiX$+5vB)|u$#tJ%`M_AX^RN80NqU7Ok zbteYwqR;dHKn@PWl9!=fZju8d4wnU+;DdL?2QpqGV|}xw3O#IFhP(n4fmUD6Mn~VG`l^IxvS)bM ze6P_PTs||Yn0)#8TXP3tF)VdJZD&r?SK(#Y$rpSDL|%J!&l*7U_Ud;P_MckE?dsf6X0e(YG9QY}UH={tg=ZkRKN5e$YQ5h7=(q2M29mBn zf{|-e-s8WGVV~4fAeY+UWvcfgenkptjkEL-=GV>d+MMss$~w*DP_92R&k~&qq7T{m zTe_B?fqFya!cMD5XQEa-!vpeQ9qP|NO;L4535Y zKCS%JQyezc31!y^Zm6Zw?JFmTE8kCo)K;`ZcxD;R*E(;G@jDnHX_RT*)gP) zdoxf5z9sFSMyawDe#dygBTqBn+?8RuHf$A<@VWl|2HF$JkcxF)eud@SRNVn4({dR9 zEK`7oG}ZItVcKpC1A`BhdSE6=X&Yjme@mYT8V_MHtlEM{U!-V#g-vufJNV=j#GMOn z$1M!X&+p@bXyey!HQ6oQ;T!a>LrQyPfe}e-$N~Ov`j4Ms0JBZgcm1*Xm??9MC1vIY z`*^tlWy7j5V8$wpd*X;#r-Qx7zFu-cObuGXOi1gmv+CiHXk8RBepUt!3FTd3G#}J* zHM`MVp%sWp4%qT{Gb$6+tq5e==bEj~{Qd4yL;Pa3-ttkO;>Mp7(&Y3Wxp?_z3f*af zV8iAPSOkOK@l-6Q!|s?$K+pWmhQV7_WOHIKasGS{YF}0=hX)Y#ENC9GSIO%&K&VYu zrxZ~epmUKh*K62U`r0q;I_Sn2a#>K=CZs1p4V+gD%`gP=B_X0DxFxtWRRn7FskTWT z?%yB9FO!W?adxY^PK3t)_UThk#JN-gjPo^-qMvME6*e1`W}|7~idhHilFro>xc5kr zrG1llHca6xyu4#9=cLG# Date: Wed, 14 May 2025 22:03:26 +0800 Subject: [PATCH 08/15] =?UTF-8?q?=E6=95=B0=E6=8D=AE=E9=9B=86=E7=94=9F?= =?UTF-8?q?=E6=88=90=E7=9A=84=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data_chain/apps/base/task/worker/generate_dataset_worker.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/data_chain/apps/base/task/worker/generate_dataset_worker.py b/data_chain/apps/base/task/worker/generate_dataset_worker.py index 6036afe..56a5c25 100644 --- a/data_chain/apps/base/task/worker/generate_dataset_worker.py +++ b/data_chain/apps/base/task/worker/generate_dataset_worker.py @@ -133,6 +133,8 @@ class GenerateDataSetWorker(BaseWorker): for i in range(len(doc_chunks)): doc_chunk = doc_chunks[i] for j in range(len(doc_chunk.chunks)): + if data_cnt == 0: + break chunk = doc_chunk.chunks[j].text if dataset_entity.is_chunk_related: if i > 0: @@ -143,6 +145,8 @@ class GenerateDataSetWorker(BaseWorker): if i == len(doc_chunks)-1 and j == len(doc_chunk.chunks)-1: qa_cnt = data_cnt qa_cnt = min(qa_cnt, data_cnt) + if qa_cnt == 0: + continue data_cnt -= qa_cnt data_cnt = max(data_cnt, 0) qs = [] -- Gitee From 84b3a8770bcaa3ea671cc96f15abdbda66fe7469 Mon Sep 17 00:00:00 2001 From: zxstty Date: Thu, 15 May 2025 14:51:50 +0800 Subject: [PATCH 09/15] =?UTF-8?q?=E5=AE=8C=E5=96=84=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E7=94=9F=E6=88=90=20=E5=AF=BC=E5=85=A5=20=E5=AF=BC=E5=87=BA?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data_chain/apps/base/convertor.py | 1 + .../base/task/worker/export_dataset_worker.py | 20 ++++--- .../task/worker/generate_dataset_worker.py | 59 ++++++++++++++----- .../worker/import_knowledge_base_worker.py | 4 +- data_chain/apps/router/acc_testing.py | 1 + data_chain/apps/router/dataset.py | 9 ++- data_chain/apps/router/task.py | 2 +- data_chain/apps/service/chunk_service.py | 3 + data_chain/apps/service/dataset_service.py | 49 ++++++++------- data_chain/apps/service/task_queue_service.py | 24 ++++++-- data_chain/apps/service/task_service.py | 2 +- data_chain/common/prompt.yaml | 2 +- data_chain/entities/request_data.py | 12 ++-- data_chain/entities/response_data.py | 3 +- data_chain/manager/chunk_manager.py | 10 +++- data_chain/manager/dataset_manager.py | 2 + data_chain/manager/qa_manager.py | 2 + data_chain/parser/handler/pptx_parser.py | 2 +- data_chain/parser/tools/ocr_tool.py | 6 -- data_chain/stores/database/database.py | 4 +- openapi.json | 2 +- openapi.json.1 | 1 + 22 files changed, 147 insertions(+), 73 deletions(-) create mode 100644 openapi.json.1 diff --git a/data_chain/apps/base/convertor.py b/data_chain/apps/base/convertor.py index 3bcbee8..442b3f9 100644 --- a/data_chain/apps/base/convertor.py +++ b/data_chain/apps/base/convertor.py @@ -508,6 +508,7 @@ class Convertor: llm_id=req.llm_id, name=req.dataset_name, description=req.description, + data_cnt=req.data_cnt, is_data_cleared=req.is_data_cleared, is_chunk_related=req.is_chunk_related, ) diff --git a/data_chain/apps/base/task/worker/export_dataset_worker.py b/data_chain/apps/base/task/worker/export_dataset_worker.py index 4264165..94f71ee 100644 --- a/data_chain/apps/base/task/worker/export_dataset_worker.py +++ b/data_chain/apps/base/task/worker/export_dataset_worker.py @@ -94,8 +94,10 @@ class ExportDataSetWorker(BaseWorker): if os.path.exists(tmp_path): shutil.rmtree(tmp_path) os.makedirs(tmp_path) - source_path = os.path.join(EXPORT_DATASET_PATH_IN_OS, 'source') + source_path = os.path.join(tmp_path, 'source') + os.makedirs(source_path) target_path = os.path.join(tmp_path, 'zip') + os.makedirs(target_path) return tmp_path, source_path, target_path @staticmethod @@ -103,9 +105,9 @@ class ExportDataSetWorker(BaseWorker): task_id: uuid.UUID, dataset_entity: DataSetEntity, qa_entities: list[QAEntity], source_path: str, target_path: str) -> str: '''从文件中加载QA实体''' - json_path = os.path.join(source_path, f"{dataset_entity.name}.json") - yaml_path = os.path.join(source_path, f"{dataset_entity.name}.yaml") - xlsx_path = os.path.join(source_path, f"{dataset_entity.name}.xlsx") + json_path = os.path.join(source_path, f"{dataset_entity.id}.json") + yaml_path = os.path.join(source_path, f"{dataset_entity.id}.yaml") + xlsx_path = os.path.join(source_path, f"{dataset_entity.id}.xlsx") qa_dict = { 'question': [], 'answer': [], @@ -126,7 +128,7 @@ class ExportDataSetWorker(BaseWorker): 'chunk': qa_entity.chunk }) with open(json_path, 'w', encoding='utf-8') as f: - json.dump(qa_list, f, indent=4) + json.dump(qa_list, f, indent=4, ensure_ascii=False) with open(yaml_path, 'w', encoding='utf-8') as f: yaml.dump(qa_list, f, allow_unicode=True) zip_path = os.path.join(target_path, str(task_id)+'.zip') @@ -138,9 +140,9 @@ class ExportDataSetWorker(BaseWorker): task_id: uuid.UUID, zip_path: str) -> None: '''上传文件到minio''' await MinIO.put_object( - bucket_name=EXPORT_DATASET_PATH_IN_MINIO, - object_name=str(task_id), - file_path=zip_path + EXPORT_DATASET_PATH_IN_MINIO, + str(task_id), + zip_path ) @staticmethod @@ -202,6 +204,6 @@ class ExportDataSetWorker(BaseWorker): if task_entity.status == TaskStatus.CANCLED or TaskStatus.FAILED.value: await MinIO.delete_object( EXPORT_DATASET_PATH_IN_MINIO, - str(task_entity.op_id) + str(task_entity.id) ) return task_id diff --git a/data_chain/apps/base/task/worker/generate_dataset_worker.py b/data_chain/apps/base/task/worker/generate_dataset_worker.py index 56a5c25..603c62d 100644 --- a/data_chain/apps/base/task/worker/generate_dataset_worker.py +++ b/data_chain/apps/base/task/worker/generate_dataset_worker.py @@ -49,7 +49,9 @@ class GenerateDataSetWorker(BaseWorker): err = f"[GenerateDataSetWorker] 数据集不存在,数据集ID: {dataset_id}" logging.exception(err) return None + await DatasetManager.update_dataset_by_dataset_id(dataset_id, {"score": -1}) dataset_entity = await DatasetManager.update_dataset_by_dataset_id(dataset_id, {"status": DataSetStatus.PENDING.value}) + await QAManager.update_qa_by_dataset_id(dataset_id, {"status": QAStatus.DELETED.value}) task_entity = TaskEntity( team_id=dataset_entity.team_id, user_id=dataset_entity.author_id, @@ -69,7 +71,7 @@ class GenerateDataSetWorker(BaseWorker): err = f"[GenerateDataSetWorker] 任务不存在,task_id: {task_id}" logging.exception(err) return False - await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"score": 0}) + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"score": -1}) await QAManager.update_qa_by_dataset_id(task_entity.op_id, {"status": QAStatus.DELETED.value}) if task_entity.retry < config['TASK_RETRY_TIME_LIMIT']: await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.PENDING.value}) @@ -95,7 +97,7 @@ class GenerateDataSetWorker(BaseWorker): dataset_doc_entities = await DatasetManager.list_dataset_document_by_dataset_id(dataset_entity.id) doc_chunks = [] for dataset_doc_entity in dataset_doc_entities: - chunk_entities = await ChunkManager.list_all_chunk_by_doc_id(dataset_doc_entity.id) + chunk_entities = await ChunkManager.list_all_chunk_by_doc_id(dataset_doc_entity.doc_id) chunks = [] for chunk_entity in chunk_entities: chunks.append(Chunk( @@ -137,10 +139,34 @@ class GenerateDataSetWorker(BaseWorker): break chunk = doc_chunk.chunks[j].text if dataset_entity.is_chunk_related: + l = i-1 + r = i+1 + tokens_sub = 0 + while TokenTool.get_tokens(chunk) < llm.max_tokens: + if l < 0 and r >= len(doc_chunks): + break + if tokens_sub > 0: + if l >= 0: + tokens_sub += TokenTool.get_tokens(doc_chunks[i].chunks[l].text) + chunk += doc_chunks[i].chunks[l].text + l -= 1 + else: + tokens_sub -= TokenTool.get_tokens(doc_chunks[i].chunks[r].text) + chunk += doc_chunks[i].chunks[r].text + r += 1 + else: + if r < len(doc_chunks): + tokens_sub += TokenTool.get_tokens(doc_chunks[i].chunks[r].text) + chunk += doc_chunks[i].chunks[r].text + r += 1 + else: + tokens_sub -= TokenTool.get_tokens(doc_chunks[i].chunks[l].text) + chunk += doc_chunks[i].chunks[l].text + l -= 1 if i > 0: - chunk = doc_chunk.chunks[i-1] + chunk + chunk = doc_chunk.chunks[i-1].text + chunk if i < len(doc_chunk.chunks) - 1: - chunk = chunk + doc_chunk.chunks[i+1] + chunk = chunk + doc_chunk.chunks[i+1].text qa_cnt = random.randint(0, 2*(division+(index <= remainder))) if i == len(doc_chunks)-1 and j == len(doc_chunk.chunks)-1: qa_cnt = data_cnt @@ -150,9 +176,10 @@ class GenerateDataSetWorker(BaseWorker): data_cnt -= qa_cnt data_cnt = max(data_cnt, 0) qs = [] - answer = [] + answers = [] rd = 5 - while len(qs) <= qa_cnt and rd > 0: + while len(qs) < qa_cnt and rd > 0: + rd -= 1 try: sys_call = q_generate_prompt_template.format( k=qa_cnt, @@ -165,6 +192,7 @@ class GenerateDataSetWorker(BaseWorker): err = f"[GenerateDataSetWorker] 生成问题失败,错误信息: {e}" logging.exception(err) continue + sub_answers = [] try: for q in sub_qs: sys_call = answer_generate_prompt_template.format( @@ -173,16 +201,18 @@ class GenerateDataSetWorker(BaseWorker): ) usr_call = '请输出答案' sub_answer = await llm.nostream([], sys_call, usr_call) - sub_answer = json.loads(sub_answer) + sub_answers.append(sub_answer) except Exception as e: err = f"[GenerateDataSetWorker] 生成答案失败,错误信息: {e}" logging.exception(err) continue - for q, answer in zip(sub_qs, sub_answer): + for q, answer in zip(sub_qs, sub_answers): + if len(qa_entities) + len(qs) >= dataset_entity.data_cnt: + break try: if dataset_entity.is_data_cleared: sys_call = cal_qa_score_prompt_template.format( - content=TokenTool.get_k_tokens_words_from_content(chunk, llm.max_tokens//9*4), + fragment=TokenTool.get_k_tokens_words_from_content(chunk, llm.max_tokens//9*4), question=TokenTool.get_k_tokens_words_from_content(q, llm.max_tokens//9), answer=TokenTool.get_k_tokens_words_from_content(answer, llm.max_tokens//9*4) ) @@ -193,13 +223,13 @@ class GenerateDataSetWorker(BaseWorker): score = 100 if score > 60: qs.append(q) - answer.append(answer) + answers.append(answer) dataset_score += score except Exception as e: err = f"[GenerateDataSetWorker] 计算分数失败,错误信息: {e}" logging.exception(err) continue - for q, ans in zip(qs, answer): + for q, ans in zip(qs, answers): qa_entity = QAEntity( dataset_id=dataset_entity.id, doc_id=doc_chunk.doc_id, @@ -214,6 +244,7 @@ class GenerateDataSetWorker(BaseWorker): dataset_score = dataset_score / len(qa_entities) await DatasetManager.update_dataset_by_dataset_id( dataset_entity.id, {'score': dataset_score}) + return qa_entities @staticmethod async def add_qa_to_db(qa_entities: list[QAEntity]) -> None: @@ -221,7 +252,7 @@ class GenerateDataSetWorker(BaseWorker): index = 0 while index < len(qa_entities): try: - await QAManager.add_qa(qa_entities[index:index+1024]) + await QAManager.add_qas(qa_entities[index:index+1024]) except Exception as e: err = f"[GenerateDataSetWorker] 添加QA到数据库失败,错误信息: {e}" logging.exception(err) @@ -250,7 +281,7 @@ class GenerateDataSetWorker(BaseWorker): await DatasetManager.update_dataset_by_dataset_id(dataset_entity.id, {"status": DataSetStatus.GENERATING.value}) current_stage = 0 stage_cnt = 3 - doc_chunks = await GenerateDataSetWorker.get_chunks(dataset_entity.id) + doc_chunks = await GenerateDataSetWorker.get_chunks(dataset_entity) current_stage += 1 await GenerateDataSetWorker.report(task_id, "获取文档分块信息", current_stage, stage_cnt) qa_entities = await GenerateDataSetWorker.generate_qa( @@ -277,6 +308,6 @@ class GenerateDataSetWorker(BaseWorker): return None await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.IDLE.value}) if task_entity.status == TaskStatus.PENDING.value or task_entity.status == TaskStatus.RUNNING.value or task_entity.status == TaskStatus.FAILED.value: - await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"score": 0}) + await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"score": -1}) await QAManager.update_qa_by_dataset_id(task_entity.op_id, {"status": QAStatus.DELETED.value}) return task_id diff --git a/data_chain/apps/base/task/worker/import_knowledge_base_worker.py b/data_chain/apps/base/task/worker/import_knowledge_base_worker.py index 670ae1b..ac69907 100644 --- a/data_chain/apps/base/task/worker/import_knowledge_base_worker.py +++ b/data_chain/apps/base/task/worker/import_knowledge_base_worker.py @@ -61,7 +61,7 @@ class ImportKnowledgeBaseWorker(BaseWorker): await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.PENDING.value}) return True else: - await MinIO.delete_object(IMPORT_KB_PATH_IN_OS, str(task_entity.op_id)) + await MinIO.delete_object(IMPORT_KB_PATH_IN_MINIO, str(task_entity.op_id)) await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.DELETED.value}) return False @@ -254,5 +254,5 @@ class ImportKnowledgeBaseWorker(BaseWorker): return None if task_entity.status == TaskStatus.CANCLED or TaskStatus.FAILED.value: await KnowledgeBaseManager.update_knowledge_base_by_kb_id(task_entity.op_id, {"status": KnowledgeBaseStatus.DELETED.value}) - await MinIO.delete_object(IMPORT_KB_PATH_IN_OS, str(task_entity.op_id)) + await MinIO.delete_object(IMPORT_KB_PATH_IN_MINIO, str(task_entity.op_id)) return task_id diff --git a/data_chain/apps/router/acc_testing.py b/data_chain/apps/router/acc_testing.py index e7a6012..46c68f2 100644 --- a/data_chain/apps/router/acc_testing.py +++ b/data_chain/apps/router/acc_testing.py @@ -33,6 +33,7 @@ async def list_testing_by_kb_id( action: Annotated[str, Depends(get_route_info)], req: Annotated[ListTestingRequest, Body()], ): + print(req.kb_id) if not (await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, req.kb_id, action)): raise Exception("用户没有权限访问该知识库的测试") list_testing_msg = await TestingService.list_testing_by_kb_id(req) diff --git a/data_chain/apps/router/dataset.py b/data_chain/apps/router/dataset.py index 105b52f..dc5c3ea 100644 --- a/data_chain/apps/router/dataset.py +++ b/data_chain/apps/router/dataset.py @@ -54,6 +54,7 @@ async def list_data_in_dataset( if not (await DataSetService.validate_user_action_to_dataset(user_sub, req.dataset_id, action)): raise Exception("用户没有权限访问该数据集的数据") list_data_in_dataset_msg = await DataSetService.list_data_in_dataset(req) + print(list_data_in_dataset_msg) return ListDataInDatasetResponse(result=list_data_in_dataset_msg) @@ -103,15 +104,16 @@ async def create_dataset( ): if not (await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, req.kb_id, action)): raise Exception("用户没有权限访问该知识库的数据集") - task_id = await DataSetService.create_dataset(req) + task_id = await DataSetService.create_dataset(user_sub, req) return CreateDatasetResponse(result=task_id) @router.post('/import', response_model=ImportDatasetResponse, dependencies=[Depends(verify_user)]) async def import_dataset(user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], kb_id: Annotated[UUID, Query(alias="kbId")], dataset_packages: list[UploadFile] = File(...)): - if not (await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, kb_id)): + if not (await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, kb_id, action)): raise Exception("用户没有权限在该知识库导入数据集") dataset_import_task_ids = await DataSetService.import_dataset(user_sub, kb_id, dataset_packages) return ImportDatasetResponse(result=dataset_import_task_ids) @@ -120,9 +122,10 @@ async def import_dataset(user_sub: Annotated[str, Depends(get_user_sub)], @router.post('/export', response_model=ExportDatasetResponse, dependencies=[Depends(verify_user)]) async def export_dataset_by_dataset_ids( user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], dataset_ids: Annotated[list[UUID], Query(alias="datasetIds")]): for dataset_id in dataset_ids: - if not (await DataSetService.validate_user_action_to_dataset(user_sub, dataset_id)): + if not (await DataSetService.validate_user_action_to_dataset(user_sub, dataset_id, action)): raise Exception("用户没有权限访问该数据集的数据") dataset_export_task_ids = await DataSetService.export_dataset(dataset_ids) return ExportDatasetResponse(result=dataset_export_task_ids) diff --git a/data_chain/apps/router/task.py b/data_chain/apps/router/task.py index 4ac458b..f24d959 100644 --- a/data_chain/apps/router/task.py +++ b/data_chain/apps/router/task.py @@ -53,5 +53,5 @@ async def delete_task_by_task_type( ): if not (await TeamService.validate_user_action_in_team(user_sub, team_id, action)): raise Exception("用户没有权限访问该团队的任务") - task_ids = await TaskService.delete_task_by_type(user_sub, team_id, task_type, task_ids) + task_ids = await TaskService.delete_task_by_type(user_sub, team_id, task_type) return DeleteTaskByTypeResponse() diff --git a/data_chain/apps/service/chunk_service.py b/data_chain/apps/service/chunk_service.py index 9601197..620e7c3 100644 --- a/data_chain/apps/service/chunk_service.py +++ b/data_chain/apps/service/chunk_service.py @@ -33,6 +33,7 @@ from data_chain.entities.common import DOC_PATH_IN_OS, DOC_PATH_IN_MINIO, DEFAUL from data_chain.logger.logger import logger as logging from data_chain.rag.base_searcher import BaseSearcher from data_chain.parser.tools.token_tool import TokenTool +from data_chain.embedding.embedding import Embedding class ChunkService: @@ -130,6 +131,8 @@ class ChunkService: async def update_chunk_by_id(chunk_id: uuid.UUID, req: UpdateChunkRequest) -> uuid.UUID: try: chunk_dict = await Convertor.convert_update_chunk_request_to_dict(req) + vector = await Embedding.get_embedding(req.text) + chunk_dict["text_vector"] = vector chunk_entity = await ChunkManager.update_chunk_by_chunk_id(chunk_id, chunk_dict) return chunk_entity.id except Exception as e: diff --git a/data_chain/apps/service/dataset_service.py b/data_chain/apps/service/dataset_service.py index da397ec..0493045 100644 --- a/data_chain/apps/service/dataset_service.py +++ b/data_chain/apps/service/dataset_service.py @@ -29,7 +29,7 @@ from data_chain.manager.team_manager import TeamManager from data_chain.manager.role_manager import RoleManager from data_chain.stores.minio.minio import MinIO from data_chain.entities.enum import ParseMethod, DataSetStatus, DocumentStatus, TaskType, TaskStatus -from data_chain.entities.common import IMPORT_DATASET_PATH_IN_OS, IMPORT_DATASET_PATH_IN_MINIO +from data_chain.entities.common import IMPORT_DATASET_PATH_IN_OS, IMPORT_DATASET_PATH_IN_MINIO, EXPORT_DATASET_PATH_IN_MINIO from data_chain.stores.database.database import DataSetEntity from data_chain.logger.logger import logger as logging @@ -88,13 +88,17 @@ class DataSetService: task_report_entities = await TaskReportManager.list_current_task_report_by_task_ids(task_ids) task_report_dict = {task_report_entity.task_id: task_report_entity for task_report_entity in task_report_entities} + datasets = [] for dataset_entity in dataset_entities: - task_entity = task_dict.get(dataset_entity.id, None) - task_report = task_report_dict.get(task_entity.id, None) if task_entity else None - dataset = await Convertor.convert_dataset_entity_to_dataset(dataset_entity, task_report) + dataset = await Convertor.convert_dataset_entity_to_dataset(dataset_entity) dataset.llm = llm - dataset.task = task_entity - return ListDatasetMsg(total=total, datasets=dataset_entities) + task_entity = task_dict.get(dataset_entity.id, None) + if task_entity: + task_report = task_report_dict.get(task_entity.id, None) if task_entity else None + task = await Convertor.convert_task_entity_to_task(task_entity, task_report) + dataset.generate_task = task + datasets.append(dataset) + return ListDatasetMsg(total=total, datasets=datasets) except Exception as e: err = "根据知识库ID列出数据集失败" logging.exception("[DataSetService] %s", err) @@ -133,7 +137,7 @@ class DataSetService: """生成数据集下载链接""" try: download_url = await MinIO.generate_download_link( - IMPORT_DATASET_PATH_IN_MINIO, + EXPORT_DATASET_PATH_IN_MINIO, str(task_id), ) return download_url @@ -151,7 +155,7 @@ class DataSetService: err = "知识库不存在" logging.exception("[DataSetService] %s", err) raise Exception(err) - dataset_entity = await Convertor.convert_create_dataset_request_to_dataset_entity(user_sub, kb_entity.id, req) + dataset_entity = await Convertor.convert_create_dataset_request_to_dataset_entity(user_sub, kb_entity.team_id, req) await DatasetManager.add_dataset(dataset_entity) dataset_doc_entities = [] for doc_id in req.document_ids: @@ -171,8 +175,8 @@ class DataSetService: user_sub: str, kb_id: uuid.UUID, dataset_packages: list[UploadFile] = File(...)) -> uuid.UUID: """导入数据集""" try: - if len(dataset_packages.filename) > 10: - err = "数据集包名称过长" + if len(dataset_packages) > 10: + err = "数据集包数量超过限制" logging.exception("[DataSetService] %s", err) raise Exception(err) dataset_package_sz = 0 @@ -212,6 +216,7 @@ class DataSetService: data_cnt=0, is_data_cleared=False, is_chunk_related=False, + is_imported=True, status=DataSetStatus.IDLE.value, score=-1 ) @@ -247,21 +252,21 @@ class DataSetService: @staticmethod async def export_dataset(dataset_ids: list[uuid.UUID]) -> uuid.UUID: """导出数据集""" - try: - dataset_export_task_ids = [] - for dataset_id in dataset_ids: - task_id = await TaskQueueService.init_task(TaskType.DATASET_EXPORT.value, dataset_entity.id) + dataset_export_task_ids = [] + for dataset_id in dataset_ids: + try: + task_id = await TaskQueueService.init_task(TaskType.DATASET_EXPORT.value, dataset_id) if task_id: dataset_export_task_ids.append(task_id) else: err = "初始化任务失败" - logging.exception("[DataSetService] %s", err) + logging.error("[DataSetService] %s", err) raise Exception(err) - return dataset_export_task_ids - except Exception as e: - err = "导出数据集失败" - logging.exception("[DataSetService] %s", err) - raise e + except Exception as e: + err = "导出数据集失败" + logging.error("[DataSetService] %s", err) + continue + return dataset_export_task_ids @staticmethod async def generate_dataset_by_id(dataset_id: uuid.UUID, generate: bool) -> uuid.UUID: @@ -269,6 +274,10 @@ class DataSetService: try: dataset_entity = await DatasetManager.get_dataset_by_dataset_id(dataset_id) if generate: + if dataset_entity.is_imported: + err = "数据集为导入数据集,不能重新生成" + logging.exception("[DataSetService] %s", err) + raise Exception(err) if dataset_entity.status == DataSetStatus.IDLE.value: task_id = await TaskQueueService.init_task(TaskType.DATASET_GENERATE.value, dataset_id) else: diff --git a/data_chain/apps/service/task_queue_service.py b/data_chain/apps/service/task_queue_service.py index 4257f52..19bd4bd 100644 --- a/data_chain/apps/service/task_queue_service.py +++ b/data_chain/apps/service/task_queue_service.py @@ -69,7 +69,7 @@ class TaskQueueService: """删除任务""" try: async with TaskQueueService.lock: - task_id = await BaseWorker.stop(task_id) + flag = await BaseWorker.stop(task_id) task_id = await BaseWorker.delete(task_id) return task_id except Exception as e: @@ -85,7 +85,11 @@ class TaskQueueService: task = await TaskQueueManager.get_oldest_tasks_by_status(TaskStatus.SUCCESS.value) if task is None: break - await BaseWorker.deinit(task.task_id) + try: + await BaseWorker.deinit(task.task_id) + except Exception as e: + err = f"[TaskQueueService] 处理成功任务失败 {e}" + logging.error(err) await TaskQueueManager.delete_task_by_id(task.task_id) @staticmethod @@ -96,7 +100,13 @@ class TaskQueueService: task = await TaskQueueManager.get_oldest_tasks_by_status(TaskStatus.FAILED.value) if task is None: break - flag = await BaseWorker.reinit(task.task_id) + try: + flag = await BaseWorker.reinit(task.task_id) + except Exception as e: + err = f"[TaskQueueService] 处理失败任务失败 {e}" + logging.error(err) + await TaskQueueManager.delete_task_by_id(task.task_id) + continue if flag: task = Task(_id=task.task_id, status=TaskStatus.PENDING.value) await TaskQueueManager.update_task_by_id(task.task_id, task) @@ -111,7 +121,13 @@ class TaskQueueService: task = await TaskQueueManager.get_oldest_tasks_by_status(TaskStatus.PENDING.value) if task is None: break - flag = await BaseWorker.run(task.task_id) + try: + flag = await BaseWorker.run(task.task_id) + except Exception as e: + err = f"[TaskQueueService] 处理待处理任务失败 {e}" + logging.error(err) + await TaskQueueManager.delete_task_by_id(task.task_id) + continue if not flag: break await TaskQueueManager.delete_task_by_id(task.task_id) diff --git a/data_chain/apps/service/task_service.py b/data_chain/apps/service/task_service.py index 7f88ee2..e8219df 100644 --- a/data_chain/apps/service/task_service.py +++ b/data_chain/apps/service/task_service.py @@ -66,7 +66,7 @@ class TaskService: err = "任务不存在" logging.exception("[TaskService] %s", err) raise Exception(err) - task_id = await TaskQueueService.stop_task(task_id) + task_id = await TaskQueueService.delete_task(task_id) return task_id except Exception as e: err = "删除任务失败" diff --git a/data_chain/common/prompt.yaml b/data_chain/common/prompt.yaml index c44be25..36b2230 100644 --- a/data_chain/common/prompt.yaml +++ b/data_chain/common/prompt.yaml @@ -347,4 +347,4 @@ CHUNK_QUERY_MATCH_PROMPT: '你是一个文本分析专家,你的任务是根 下面是给出的片段和问题: 片段:{chunk} 问题:{question} - " \ No newline at end of file + ' \ No newline at end of file diff --git a/data_chain/entities/request_data.py b/data_chain/entities/request_data.py index f83611d..1ce59d2 100644 --- a/data_chain/entities/request_data.py +++ b/data_chain/entities/request_data.py @@ -65,7 +65,7 @@ class DocumentType(BaseModel): class ListKnowledgeBaseRequest(BaseModel): - team_id: Optional[uuid.UUID] = Field(default=None, description="团队id", alias="teamId") + team_id: uuid.UUID = Field(description="团队id", alias="teamId") kb_id: Optional[uuid.UUID] = Field(default=None, description="资产id", alias="kbId") kb_name: Optional[str] = Field(default=None, description="资产名称", alias="kbName") author_name: Optional[str] = Field(default=None, description="资产创建者", alias="authorName") @@ -99,7 +99,7 @@ class UpdateKnowledgeBaseRequest(BaseModel): class ListDocumentRequest(BaseModel): - kb_id: Optional[uuid.UUID] = Field(default=None, description="资产id", alias="kbId") + kb_id: uuid.UUID = Field(description="资产id", alias="kbId") doc_id: Optional[uuid.UUID] = Field(default=None, description="文档id", min_length=1, max_length=30, alias="docId") doc_name: Optional[str] = Field(default=None, description="文档名称", alias="docName") doc_type_id: Optional[uuid.UUID] = Field(default=None, description="文档类型id", alias="docTypeId") @@ -151,7 +151,7 @@ class SearchChunkRequest(BaseModel): class ListDatasetRequest(BaseModel): - kb_id: Optional[uuid.UUID] = Field(default=None, description="资产id", alias="kbId") + kb_id: uuid.UUID = Field(description="资产id", alias="kbId") dataset_id: Optional[uuid.UUID] = Field(default=None, description="数据集id", alias="datasetId") dataset_name: Optional[str] = Field(default=None, description="数据集名称", alias="datasetName") data_cnt_order: Optional[OrderType] = Field(default=OrderType.DESC, description="数据集数据数量", alias="dataCnt") @@ -197,7 +197,7 @@ class UpdateDataRequest(BaseModel): class ListTestingRequest(BaseModel): - kb_id: Optional[uuid.UUID] = Field(default=None, description="资产id", alias="kbId") + kb_id: uuid.UUID = Field(description="资产id", alias="kbId") testing_id: Optional[uuid.UUID] = Field(default=None, description="测试id", alias="testingId") testing_name: Optional[str] = Field(default=None, description="测试名称", alias="testingName") llm_id: Optional[str] = Field(default=None, description="测试使用的大模型id", alias="llmId") @@ -230,7 +230,7 @@ class UpdateTestingRequest(BaseModel): class ListRoleRequest(BaseModel): - team_id: Optional[uuid.UUID] = Field(default=None, description="团队id", alias="teamId") + team_id: uuid.UUID = Field(description="团队id", alias="teamId") role_id: Optional[uuid.UUID] = Field(default=None, description="角色id", alias="roleId") role_name: Optional[str] = Field(default=None, description="角色名称", alias="roleName") page: int = Field(default=1, description="页码") @@ -254,7 +254,7 @@ class ListUserRequest(BaseModel): class ListTaskRequest(BaseModel): - team_id: Optional[uuid.UUID] = Field(default=None, description="团队id", alias="teamId") + team_id: uuid.UUID = Field(description="团队id", alias="teamId") task_id: Optional[uuid.UUID] = Field(default=None, description="任务id", alias="taskId") task_type: Optional[TaskType] = Field(default=None, description="任务类型", alias="taskType") task_status: Optional[TaskStatus] = Field(default=None, description="任务状态", alias="taskStatus") diff --git a/data_chain/entities/response_data.py b/data_chain/entities/response_data.py index 518ab4c..b953e99 100644 --- a/data_chain/entities/response_data.py +++ b/data_chain/entities/response_data.py @@ -332,6 +332,7 @@ class Dataset(BaseModel): data_cnt: int = Field(description="数据集数据数量", alias="dataCnt") is_data_cleared: bool = Field(default=False, description="数据集是否进行清洗", alias="isDataCleared") is_chunk_related: bool = Field(default=False, description="数据集进行上下文关联", alias="isChunkRelated") + is_imported: bool = Field(default=False, description="数据集是否导入", alias="isImported") llm: Optional[LLM] = Field(default=None, description="生成数据集使用的大模型信息", alias="llm") generate_task: Optional[Task] = Field(default=None, description="数据集生成任务", alias="generateTask") score: Optional[float] = Field(description="数据集评分", default=None) @@ -362,7 +363,7 @@ class Data(BaseModel): class ListDataInDatasetMsg(BaseModel): """GET /dataset/data 数据结构""" total: int = Field(default=0, description="总数") - datas: list[Data] = Field(default=[], description="数据列表", alias="data") + datas: list[Data] = Field(default=[], description="数据列表", alias="datas") class ListDataInDatasetResponse(ResponseData): diff --git a/data_chain/manager/chunk_manager.py b/data_chain/manager/chunk_manager.py index 0a43115..39e1afe 100644 --- a/data_chain/manager/chunk_manager.py +++ b/data_chain/manager/chunk_manager.py @@ -257,7 +257,7 @@ class ChunkManager(): logging.exception("[ChunkManager] %s", err) @staticmethod - async def update_chunk_by_chunk_id(chunk_id: uuid.UUID, chunk_dict: Dict[str, str]) -> bool: + async def update_chunk_by_chunk_id(chunk_id: uuid.UUID, chunk_dict: Dict[str, str]) -> ChunkEntity: """根据文档ID更新文档解析结果""" try: async with await DataBase.get_session() as session: @@ -268,7 +268,13 @@ class ChunkManager(): ) await session.execute(stmt) await session.commit() - return True + stmt = ( + select(ChunkEntity) + .where(ChunkEntity.id == chunk_id) + ) + result = await session.execute(stmt) + chunk_entity = result.scalars().first() + return chunk_entity except Exception as e: err = "根据文档ID更新文档解析结果失败" logging.exception("[ChunkManager] %s", err) diff --git a/data_chain/manager/dataset_manager.py b/data_chain/manager/dataset_manager.py index 121ea1d..a6cdd0c 100644 --- a/data_chain/manager/dataset_manager.py +++ b/data_chain/manager/dataset_manager.py @@ -108,6 +108,8 @@ class DatasetManager: count_stmt = select(func.count()).select_from(stmt.subquery()) total = (await session.execute(count_stmt)).scalar() stmt = stmt.offset((req.page - 1) * req.page_size).limit(req.page_size) + stmt = stmt.order_by(DataSetEntity.created_at.desc()) + stmt = stmt.order_by(DataSetEntity.id.desc()) result = await session.execute(stmt) dataset_entities = result.scalars().all() return total, dataset_entities diff --git a/data_chain/manager/qa_manager.py b/data_chain/manager/qa_manager.py index f743eb1..b671d13 100644 --- a/data_chain/manager/qa_manager.py +++ b/data_chain/manager/qa_manager.py @@ -88,6 +88,8 @@ class QAManager: count_stmt = select(func.count()).select_from(stmt.subquery()) total = (await session.execute(count_stmt)).scalar() stmt = stmt.offset((req.page - 1) * req.page_size).limit(req.page_size) + stmt = stmt.order_by(QAEntity.created_at.desc()) + stmt = stmt.order_by(QAEntity.id.desc()) result = await session.execute(stmt) qa_entities = result.scalars().all() return total, qa_entities diff --git a/data_chain/parser/handler/pptx_parser.py b/data_chain/parser/handler/pptx_parser.py index 98d7e76..87fff25 100644 --- a/data_chain/parser/handler/pptx_parser.py +++ b/data_chain/parser/handler/pptx_parser.py @@ -17,7 +17,7 @@ class PptxParser(BaseParser): for row in table.rows: row_data = [] for cell in row.cells: - cell_text = ''.join([p.text for p in cell.paragraphs]) + cell_text = ''.join([p.text for p in cell.text_frame.paragraphs]) row_data.append(cell_text) table_data.append(row_data) return table_data diff --git a/data_chain/parser/tools/ocr_tool.py b/data_chain/parser/tools/ocr_tool.py index ac67a7e..1173963 100644 --- a/data_chain/parser/tools/ocr_tool.py +++ b/data_chain/parser/tools/ocr_tool.py @@ -89,9 +89,3 @@ class OcrTool: err = f"[OCRTool] 图片转文本失败 {e}" logging.exception(err) return '' - - -path = './test.jpeg' -image = np.array(Image.open(path)) -result = asyncio.run(OcrTool.ocr_from_image(image)) -print(result) diff --git a/data_chain/stores/database/database.py b/data_chain/stores/database/database.py index 0ec2347..dbeb4ab 100644 --- a/data_chain/stores/database/database.py +++ b/data_chain/stores/database/database.py @@ -22,6 +22,7 @@ from data_chain.entities.enum import (Tokenizer, ImageStatus, ChunkParseTopology, DataSetStatus, + QAStatus, TestingStatus, SearchMethod, TaskType, @@ -366,6 +367,7 @@ class DataSetEntity(Base): data_cnt = Column(Integer) # 数据集数据量 is_data_cleared = Column(Boolean, default=False) # 数据集是否清洗 is_chunk_related = Column(Boolean, default=False) # 数据集是否关联上下文 + is_imported = Column(Boolean, default=False) # 数据集是否导入 status = Column(String, default=DataSetStatus.IDLE) # 数据集状态 score = Column(Float) # 数据集得分 created_at = Column( @@ -409,7 +411,7 @@ class QAEntity(Base): answer = Column(String) # 数据的答案 chunk = Column(String) # 数据的片段 chunk_type = Column(String, default="未知片段类型") # 数据的片段类型 - status = Column(String) + status = Column(String, default=QAStatus.EXISTED.value) # 数据的状态 created_at = Column( TIMESTAMP(timezone=True), nullable=True, diff --git a/openapi.json b/openapi.json index e3d7b3d..e46fc67 100644 --- a/openapi.json +++ b/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"FastAPI","version":"0.1.0"},"paths":{"/routes":{"get":{"summary":"Get All Routes","operationId":"get_all_routes_routes_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/team/list":{"post":{"tags":["Team"],"summary":"List Teams","operationId":"list_teams_team_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/usr":{"post":{"tags":["Team"],"summary":"List Team User By Team Id","operationId":"list_team_user_by_team_id_team_usr_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_usr_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamUserRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team User By Team Id And User Subs","operationId":"delete_team_user_by_team_id_and_user_subs_team_usr_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string"},"title":"Usersub"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/msg":{"post":{"tags":["Team"],"summary":"List Team Msg By Team Id","operationId":"list_team_msg_by_team_id_team_msg_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team":{"post":{"tags":["Team"],"summary":"Create Team","operationId":"create_team_team_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team By Team Id","operationId":"delete_team_by_team_id_team_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/invitation":{"post":{"tags":["Team"],"summary":"Invite Team User By User Sub","operationId":"invite_team_user_by_user_sub_team_invitation_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"userSubInvite","in":"query","required":true,"schema":{"type":"string","title":"Usersubinvite"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/InviteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/application":{"post":{"tags":["Team"],"summary":"Join Team","operationId":"join_team_team_application_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JoinTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/author":{"put":{"tags":["Team"],"summary":"Update Team Author By Team Id","operationId":"update_team_author_by_team_id_team_author_put","parameters":[{"name":"recriverSub","in":"query","required":true,"schema":{"type":"string","title":"Recriversub"}},{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamAuthorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb":{"get":{"tags":["Knowledge Base"],"summary":"List Kb By User Sub","operationId":"list_kb_by_user_sub_kb_get","parameters":[{"name":"kbName","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListAllKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"tags":["Knowledge Base"],"summary":"Create Kb","operationId":"create_kb_kb_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Knowledge Base"],"summary":"Update Kb By Kb Id","operationId":"update_kb_by_kb_id_kb_put","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Knowledge Base"],"summary":"Delete Kb By Kb Ids","operationId":"delete_kb_by_kb_ids_kb_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/team":{"post":{"tags":["Knowledge Base"],"summary":"List Kb By Team Id","operationId":"list_kb_by_team_id_kb_team_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/doc_type":{"get":{"tags":["Knowledge Base"],"summary":"List Doc Types By Kb Id","operationId":"list_doc_types_by_kb_id_kb_doc_type_get","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentTypesResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/download":{"get":{"tags":["Knowledge Base"],"summary":"Download Kb By Task Id","operationId":"download_kb_by_task_id_kb_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/import":{"post":{"tags":["Knowledge Base"],"summary":"Import Kbs","operationId":"import_kbs_kb_import_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_kbs_kb_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/export":{"post":{"tags":["Knowledge Base"],"summary":"Export Kb By Kb Ids","operationId":"export_kb_by_kb_ids_kb_export_post","parameters":[{"name":"kbIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/list":{"post":{"tags":["Chunk"],"summary":"List Chunks By Document Id","operationId":"list_chunks_by_document_id_chunk_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/search":{"post":{"tags":["Chunk"],"summary":"Search Chunks","operationId":"search_chunks_chunk_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/text":{"put":{"tags":["Chunk"],"summary":"Update Chunk By Id","operationId":"update_chunk_by_id_chunk_text_put","parameters":[{"name":"chunkId","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Chunkid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/switch":{"put":{"tags":["Chunk"],"summary":"Update Chunk Enabled By Id","operationId":"update_chunk_enabled_by_id_chunk_switch_put","parameters":[{"name":"enabled","in":"query","required":true,"schema":{"type":"boolean","title":"Enabled"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Chunkid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkEnabledResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/list":{"post":{"tags":["Document"],"summary":"List Doc","operationId":"list_doc_doc_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/download":{"get":{"tags":["Document"],"summary":"Download Doc By Id","operationId":"download_doc_by_id_doc_download_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/report":{"get":{"tags":["Document"],"summary":"Get Doc Report","operationId":"get_doc_report_doc_report_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetDocumentReportResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc":{"post":{"tags":["Document"],"summary":"Upload Docs","operationId":"upload_docs_doc_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_upload_docs_doc_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UploadDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Document"],"summary":"Update Doc By Doc Id","operationId":"update_doc_by_doc_id_doc_put","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/parse":{"post":{"tags":["Document"],"summary":"Parse Docuement By Doc Ids","operationId":"parse_docuement_by_doc_ids_doc_parse_post","parameters":[{"name":"docIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}},{"name":"parse","in":"query","required":true,"schema":{"type":"boolean","title":"Parse"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ParseDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/switch":{"put":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_switch_put","requestBody":{"content":{"application/json":{"schema":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Docids"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/health_check":{"get":{"tags":["Health check"],"summary":"Health Check","operationId":"health_check_health_check_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/dataset/list":{"post":{"tags":["Dataset"],"summary":"List Dataset By Kb Id","operationId":"list_dataset_by_kb_id_dataset_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/data":{"post":{"tags":["Dataset"],"summary":"List Data In Dataset","operationId":"list_data_in_dataset_dataset_data_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Data By Dataset Id","operationId":"update_data_by_dataset_id_dataset_data_put","parameters":[{"name":"dataId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Dataid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Data By Data Ids","operationId":"delete_data_by_data_ids_dataset_data_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Dataids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/testing/exist":{"get":{"tags":["Dataset"],"summary":"Is Dataset Have Testing","operationId":"is_dataset_have_testing_dataset_testing_exist_get","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/IsDatasetHaveTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/download":{"get":{"tags":["Dataset"],"summary":"Download Dataset By Task Id","operationId":"download_dataset_by_task_id_dataset_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset":{"post":{"tags":["Dataset"],"summary":"Create Dataset","operationId":"create_dataset_dataset_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Dataset By Dataset Id","operationId":"update_dataset_by_dataset_id_dataset_put","parameters":[{"name":"databaseId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Databaseid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Dataset By Dataset Ids","operationId":"delete_dataset_by_dataset_ids_dataset_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Databaseid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/import":{"post":{"tags":["Dataset"],"summary":"Import Dataset","operationId":"import_dataset_dataset_import_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_dataset_dataset_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/export":{"post":{"tags":["Dataset"],"summary":"Export Dataset By Dataset Ids","operationId":"export_dataset_by_dataset_ids_dataset_export_post","parameters":[{"name":"datasetIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Datasetids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/generate":{"post":{"tags":["Dataset"],"summary":"Generate Dataset By Id","operationId":"generate_dataset_by_id_dataset_generate_post","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}},{"name":"generate","in":"query","required":true,"schema":{"type":"boolean","title":"Generate"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/other/llm":{"get":{"tags":["Other"],"summary":"List Llms By User Sub","operationId":"list_llms_by_user_sub_other_llm_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListLLMResponse"}}}}}}},"/other/embedding":{"get":{"tags":["Other"],"summary":"List Embeddings","operationId":"list_embeddings_other_embedding_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListEmbeddingResponse"}}}}}}},"/other/tokenizer":{"get":{"tags":["Other"],"summary":"List Tokenizers","operationId":"list_tokenizers_other_tokenizer_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTokenizerResponse"}}}}}}},"/other/parse_method":{"get":{"tags":["Other"],"summary":"List Parse Method","operationId":"list_parse_method_other_parse_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListParseMethodResponse"}}}}}}},"/other/search_method":{"get":{"tags":["Other"],"summary":"List Search Method","operationId":"list_search_method_other_search_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListSearchMethodResponse"}}}}}}},"/testing/list":{"post":{"tags":["Testing"],"summary":"List Testing By Kb Id","operationId":"list_testing_by_kb_id_testing_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/testcase":{"post":{"tags":["Testing"],"summary":"List Testcase By Testing Id","operationId":"list_testcase_by_testing_id_testing_testcase_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestCaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/download":{"get":{"tags":["Testing"],"summary":"Download Testing Report By Testing Id","operationId":"download_testing_report_by_testing_id_testing_download_get","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing":{"post":{"tags":["Testing"],"summary":"Create Testing","operationId":"create_testing_testing_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingResponsing"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Testing"],"summary":"Update Testing By Testing Id","operationId":"update_testing_by_testing_id_testing_put","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Testing"],"summary":"Delete Testing By Testing Ids","operationId":"delete_testing_by_testing_ids_testing_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Testingids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/run":{"post":{"tags":["Testing"],"summary":"Run Testing By Testing Id","operationId":"run_testing_by_testing_id_testing_run_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}},{"name":"run","in":"query","required":true,"schema":{"type":"boolean","title":"Run"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/RunTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role/action":{"get":{"tags":["Role"],"summary":"List Actions","operationId":"list_actions_role_action_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListActionResponse"}}}}}}},"/role/list":{"post":{"tags":["Role"],"summary":"List Role By Team Id","operationId":"list_role_by_team_id_role_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role":{"post":{"tags":["Role"],"summary":"Create Role","operationId":"create_role_role_post","parameters":[{"name":"TeamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Role"],"summary":"Update Role By Role Id","operationId":"update_role_by_role_id_role_put","parameters":[{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Role"],"summary":"Delete Role By Role Ids","operationId":"delete_role_by_role_ids_role_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Roleid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg/list":{"post":{"tags":["User Message"],"summary":"List User Msgs By User Sub","operationId":"list_user_msgs_by_user_sub_usr_msg_list_post","parameters":[{"name":"msgType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserMessageType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg":{"put":{"tags":["User Message"],"summary":"Update User Msg By Msg Id","operationId":"update_user_msg_by_msg_id_usr_msg_put","parameters":[{"name":"msgId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Msgid"}},{"name":"msgStatus","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserStatus"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["User Message"],"summary":"Delete User Msg By Msg Ids","operationId":"delete_user_msg_by_msg_ids_usr_msg_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Msgids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task":{"post":{"tags":["Task"],"summary":"List Task","operationId":"list_task_task_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/one":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Id","operationId":"delete_task_by_task_id_task_one_delete","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByIdResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/all":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Type","operationId":"delete_task_by_task_type_task_all_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"taskType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/TaskType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByTypeResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"ActionType":{"type":"string","enum":["team","user","role","knowledge_base","document","chunk","dataset","testing","dataset_data","task"],"title":"ActionType","description":"操作类型"},"Body_import_dataset_dataset_import_post":{"properties":{"dataset_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Dataset Packages"}},"type":"object","required":["dataset_packages"],"title":"Body_import_dataset_dataset_import_post"},"Body_import_kbs_kb_import_post":{"properties":{"kb_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Kb Packages"}},"type":"object","required":["kb_packages"],"title":"Body_import_kbs_kb_import_post"},"Body_upload_docs_doc_post":{"properties":{"docs":{"items":{"type":"string","format":"binary"},"type":"array","title":"Docs"}},"type":"object","required":["docs"],"title":"Body_upload_docs_doc_post"},"Chunk":{"properties":{"chunkId":{"type":"string","format":"uuid","title":"Chunkid","description":"分片ID"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"分片类型"},"text":{"type":"string","title":"Text","description":"分片文本"}},"type":"object","required":["chunkId","chunkType","text"],"title":"Chunk","description":"文档分片信息"},"ChunkType":{"type":"string","enum":["text","table","image","code","link","qa"],"title":"ChunkType","description":"分块类型"},"CreateDatasetRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""},"documentIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Documentids","description":"测试数据集关联的文档","default":[]},"dataCnt":{"type":"integer","title":"Datacnt","description":"测试数据集内的数据数量","default":64,"max":512,"min":1},"llmId":{"type":"string","title":"Llmid","description":"测试数据集使用的大模型id"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"测试数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"测试数据集进行上下文关联","default":false}},"type":"object","required":["kbId","llmId"],"title":"CreateDatasetRequest"},"CreateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集生成任务ID"}},"type":"object","title":"CreateDatasetResponse","description":"POST /dataset 响应"},"CreateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":20,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"知识库使用的embedding模型","default":""},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"CreateKnowledgeBaseRequest"},"CreateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"CreateKnowledgeBaseResponse","description":"POST /kb 响应"},"CreateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"CreateRoleRequest"},"CreateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"CreateRoleResponse","description":"POST /role 响应"},"CreateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"CreateTeamRequest"},"CreateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"CreateTeamResponse","description":"POST /team 响应"},"CreateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":30,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"测试数据集id"},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_and_vector"},"topK":{"type":"integer","title":"Topk","description":"测试中检索方法关联的片段数量","default":5}},"type":"object","required":["datasetId","llmId"],"title":"CreateTestingRequest"},"CreateTestingResponsing":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"CreateTestingResponsing","description":"POST /testing 响应"},"Data":{"properties":{"dataId":{"type":"string","format":"uuid","title":"Dataid","description":"数据ID"},"docName":{"type":"string","title":"Docname","description":"数据关联的文档名称"},"question":{"type":"string","title":"Question","description":"数据的问题"},"answer":{"type":"string","title":"Answer","description":"数据的答案"},"chunk":{"type":"string","title":"Chunk","description":"数据的片段"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"数据的片段类型"}},"type":"object","required":["dataId","docName","question","answer","chunk","chunkType"],"title":"Data"},"DataSetStatus":{"type":"string","enum":["idle","pending","generating","exporting","importing","deleted"],"title":"DataSetStatus","description":"数据集状态"},"Dataset":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"数据集描述","max":150},"dataCnt":{"type":"integer","title":"Datacnt","description":"数据集数据数量"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"数据集进行上下文关联","default":false},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"生成数据集使用的大模型信息"},"generateTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"数据集生成任务"},"score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Score","description":"数据集评分"},"authorName":{"type":"string","title":"Authorname","description":"数据集创建者的用户名"},"status":{"$ref":"#/components/schemas/DataSetStatus"}},"type":"object","required":["datasetId","datasetName","description","dataCnt","authorName","status"],"title":"Dataset","description":"数据集信息"},"DatasetTesting":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称"},"testings":{"items":{"$ref":"#/components/schemas/Testing"},"type":"array","title":"Testings","description":"测试列表","default":[]}},"type":"object","required":["datasetId","datasetName"],"title":"DatasetTesting","description":"数据集测试信息"},"DeleteDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据ID列表","default":[]}},"type":"object","title":"DeleteDataResponse","description":"DELETE /dataset/data 响应"},"DeleteDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据集ID列表","default":[]}},"type":"object","title":"DeleteDatasetResponse","description":"DELETE /dataset 响应"},"DeleteDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"DeleteDocumentResponse","description":"DELETE /doc 响应"},"DeleteKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"知识库ID列表","default":[]}},"type":"object","title":"DeleteKnowledgeBaseResponse","description":"DELETE /kb 响应"},"DeleteRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"角色ID列表","default":[]}},"type":"object","title":"DeleteRoleResponse","description":"DELETE /role 响应"},"DeleteTaskByIdResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"任务ID"}},"type":"object","title":"DeleteTaskByIdResponse","description":"DELETE /task/one 响应"},"DeleteTaskByTypeResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"DeleteTaskByTypeResponse","description":"DELETE /task/all 响应"},"DeleteTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"DeleteTeamResponse","description":"DELETE /team 响应"},"DeleteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"团队成员ID列表","default":[]}},"type":"object","title":"DeleteTeamUserResponse","description":"DELETE /team/usr 响应"},"DeleteTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"测试ID列表","default":[]}},"type":"object","title":"DeleteTestingResponse","description":"DELETE /testing 响应"},"DeleteUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"消息ID列表","default":[]}},"type":"object","title":"DeleteUserMessageResponse","description":"DELETE /usr_msg 响应"},"DocChunk":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","required":["docId","docName"],"title":"DocChunk","description":"Post /chunk/search 数据结构"},"Document":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"docType":{"allOf":[{"$ref":"#/components/schemas/DocumentType-Output"}],"description":"文档类型"},"chunkSize":{"type":"integer","title":"Chunksize","description":"文档分片大小"},"createdTime":{"type":"string","title":"Createdtime","description":"文档创建时间"},"docTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"文档任务"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"文档解析方法"},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用"},"authorName":{"type":"string","title":"Authorname","description":"文档创建者的用户名"},"status":{"$ref":"#/components/schemas/DocumentStatus"}},"type":"object","required":["docId","docName","docType","chunkSize","createdTime","parseMethod","enabled","authorName","status"],"title":"Document","description":"文档信息"},"DocumentStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"DocumentStatus","description":"文档状态"},"DocumentType-Input":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"docTypeName":{"type":"string","maxLength":20,"minLength":1,"title":"Doctypename","default":"这是一个默认的文档类型名称"}},"type":"object","title":"DocumentType"},"DocumentType-Output":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型ID"},"docTypeName":{"type":"string","title":"Doctypename","description":"文档类型名称"}},"type":"object","required":["docTypeId","docTypeName"],"title":"DocumentType","description":"文档类型信息"},"ExportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ExportDatasetResponse","description":"POST /dataset/export 响应"},"ExportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ExportKnowledgeBaseResponse","description":"POST /kb/export 响应"},"GenerateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"GenerateDatasetResponse","description":"POST /dataset/generate 响应"},"GetDocumentReportResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","title":"Result","description":"文档报告数据结构","default":""}},"type":"object","title":"GetDocumentReportResponse","description":"GET /doc/report 响应"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ImportDatasetResponse","description":"POST /dataset/import 响应"},"ImportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ImportKnowledgeBaseResponse","description":"POST /kb/import 响应"},"InviteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"邀请ID"}},"type":"object","title":"InviteTeamUserResponse","description":"POST /team/invitation 响应"},"IsDatasetHaveTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"boolean","title":"Result","description":"数据集是否有测试任务","default":false}},"type":"object","title":"IsDatasetHaveTestingResponse","description":"GET /dataset/testing/exist 响应"},"JoinTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"申请ID"}},"type":"object","title":"JoinTeamResponse","description":"POST /team/application 响应"},"Knowledgebase":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"知识库ID"},"kbName":{"type":"string","title":"Kbname","description":"知识库名称","max":20,"min":1},"authorName":{"type":"string","title":"Authorname","description":"知识库创建者的用户名"},"tokenizer":{"$ref":"#/components/schemas/Tokenizer"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"嵌入模型"},"description":{"type":"string","title":"Description","description":"知识库描述","max":150},"docCnt":{"type":"integer","title":"Doccnt","description":"知识库文档数量"},"docSize":{"type":"integer","title":"Docsize","description":"知识库文档大小"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库单次文件上传数量限制"},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库单次文件上传大小限制"},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"默认解析方法"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"默认分块大小"},"createdTime":{"type":"string","title":"Createdtime","description":"知识库创建时间"},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Doctypes","description":"知识库文档类型列表","default":[]}},"type":"object","required":["kbId","kbName","authorName","tokenizer","embeddingModel","description","docCnt","docSize","uploadCountLimit","uploadSizeLimit","defaultParseMethod","defaultChunkSize","createdTime"],"title":"Knowledgebase","description":"知识库信息"},"LLM":{"properties":{"llmId":{"type":"string","title":"Llmid","description":"大模型ID"},"llmName":{"type":"string","title":"Llmname","description":"大模型名称","max":20,"min":1},"llmIcon":{"type":"string","title":"Llmicon","description":"大模型图标"}},"type":"object","required":["llmId","llmName","llmIcon"],"title":"LLM"},"ListActionMsg":{"properties":{"actionTypes":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Actiontypes","description":"操作类型列表","default":[]}},"type":"object","title":"ListActionMsg","description":"GET /role/action 数据结构"},"ListActionResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListActionMsg"}],"description":"操作列表数据结构","default":{"actionTypes":[]}}},"type":"object","title":"ListActionResponse"},"ListAllKnowledgeBaseMsg":{"properties":{"teamKnowledgebases":{"items":{"$ref":"#/components/schemas/TeamKnowledgebase"},"type":"array","title":"Teamknowledgebases","description":"团队知识库列表","default":[]}},"type":"object","title":"ListAllKnowledgeBaseMsg","description":"GET /kb 数据结构"},"ListAllKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListAllKnowledgeBaseMsg"}],"description":"团队知识库列表数据结构","default":{"teamKnowledgebases":[]}}},"type":"object","title":"ListAllKnowledgeBaseResponse","description":"GET /kb 响应"},"ListChunkMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","title":"ListChunkMsg","description":"GET /chunk 数据结构"},"ListChunkRequest":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档id"},"text":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Text","description":"分块文本内容"},"types":{"anyOf":[{"items":{"$ref":"#/components/schemas/ChunkType"},"type":"array"},{"type":"null"}],"title":"Types","description":"分块类型"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["docId"],"title":"ListChunkRequest"},"ListChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListChunkMsg"}],"description":"分片列表数据结构","default":{"total":0,"chunks":[]}}},"type":"object","title":"ListChunkResponse","description":"GET /chunk 响应"},"ListDataInDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"data":{"items":{"$ref":"#/components/schemas/Data"},"type":"array","title":"Data","description":"数据列表","default":[]}},"type":"object","title":"ListDataInDatasetMsg","description":"GET /dataset/data 数据结构"},"ListDataInDatasetRequest":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["datasetId"],"title":"ListDataInDatasetRequest"},"ListDataInDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDataInDatasetMsg"}],"description":"数据列表数据结构","default":{"total":0,"data":[]}}},"type":"object","title":"ListDataInDatasetResponse","description":"GET /dataset/data 响应"},"ListDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasets":{"items":{"$ref":"#/components/schemas/Dataset"},"type":"array","title":"Datasets","description":"数据集列表","default":[]}},"type":"object","title":"ListDatasetMsg","description":"GET /dataset 数据结构"},"ListDatasetRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"datasetId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Datasetid","description":"数据集id"},"datasetName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Datasetname","description":"数据集名称"},"dataCnt":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集数据数量","default":"desc"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"数据集使用的大模型id"},"isDataCleared":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Isdatacleared","description":"数据集是否清洗"},"isChunkRelated":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Ischunkrelated","description":"数据集是否上下文关联"},"generateStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Generatestatus","description":"数据集生成状态"},"scoreOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集评分的排序方法","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"数据集创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListDatasetRequest"},"ListDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDatasetMsg"}],"description":"数据集列表数据结构","default":{"total":0,"datasets":[]}}},"type":"object","title":"ListDatasetResponse","description":"GET /dataset 响应"},"ListDocumentMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"documents":{"items":{"$ref":"#/components/schemas/Document"},"type":"array","title":"Documents","description":"文档列表","default":[]}},"type":"object","title":"ListDocumentMsg","description":"GET /doc 数据结构"},"ListDocumentRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"docId":{"anyOf":[{"type":"string","maxLength":30,"minLength":1,"format":"uuid"},{"type":"null"}],"title":"Docid","description":"文档id"},"docName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Docname","description":"文档名称"},"docTypeId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Doctypeid","description":"文档类型id"},"parseStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Parsestatus","description":"文档解析状态"},"parseMethod":{"anyOf":[{"$ref":"#/components/schemas/ParseMethod"},{"type":"null"}],"description":"文档解析方法"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"文档创建者"},"createdTimeStart":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimestart","description":"文档创建时间开始"},"createdTimeEnd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimeend","description":"文档创建时间结束"},"createdTimeOrder":{"allOf":[{"$ref":"#/components/schemas/OrderType"}],"description":"文档创建时间排序","default":"desc"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListDocumentRequest"},"ListDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDocumentMsg"}],"description":"文档列表数据结构","default":{"total":0,"documents":[]}}},"type":"object","title":"ListDocumentResponse","description":"GET /doc 响应"},"ListDocumentTypesResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Result","description":"文档类型列表数据结构","default":[]}},"type":"object","title":"ListDocumentTypesResponse","description":"GET /kb/doc_type 响应"},"ListEmbeddingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"向量化模型的列表数据结构","default":[]}},"type":"object","title":"ListEmbeddingResponse","description":"GET /other/embedding 数据结构"},"ListKnowledgeBaseMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表数据结构","default":[]}},"type":"object","title":"ListKnowledgeBaseMsg"},"ListKnowledgeBaseRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"kbName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname","description":"资产名称"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"资产创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListKnowledgeBaseRequest"},"ListKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListKnowledgeBaseMsg"}],"default":{"total":0,"kbList":[]}}},"type":"object","title":"ListKnowledgeBaseResponse","description":"GET /kb/team 响应"},"ListLLMMsg":{"properties":{"llms":{"items":{"$ref":"#/components/schemas/LLM"},"type":"array","title":"Llms","description":"大模型列表","default":[]}},"type":"object","title":"ListLLMMsg","description":"GET /other/llm 数据结构"},"ListLLMResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListLLMMsg"}],"description":"大模型列表数据结构","default":{"llms":[]}}},"type":"object","title":"ListLLMResponse","description":"GET /other/llm 响应"},"ListParseMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"解析方法的列表数据结构","default":[]}},"type":"object","title":"ListParseMethodResponse","description":"\"GET /other/parse_method 响应"},"ListRoleMsg":{"properties":{"roles":{"items":{"$ref":"#/components/schemas/role"},"type":"array","title":"Roles","description":"角色列表","default":[]}},"type":"object","title":"ListRoleMsg","description":"GET /role 数据结构"},"ListRoleRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"roleId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Roleid","description":"角色id"},"roleName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rolename","description":"角色名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListRoleRequest"},"ListRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListRoleMsg"}],"description":"角色列表数据结构","default":{"roles":[]}}},"type":"object","title":"ListRoleResponse","description":"GET /role 响应"},"ListSearchMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"搜索方法的列表数据结构","default":[]}},"type":"object","title":"ListSearchMethodResponse","description":"GET /other/search_method 响应"},"ListTaskMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"tasks":{"items":{"$ref":"#/components/schemas/Task"},"type":"array","title":"Tasks","description":"任务列表","default":[]}},"type":"object","title":"ListTaskMsg","description":"GET /task 数据结构"},"ListTaskRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"taskId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Taskid","description":"任务id"},"taskType":{"anyOf":[{"$ref":"#/components/schemas/TaskType"},{"type":"null"}],"description":"任务类型"},"taskStatus":{"anyOf":[{"$ref":"#/components/schemas/TaskStatus"},{"type":"null"}],"description":"任务状态"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTaskRequest"},"ListTaskResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTaskMsg"}],"description":"任务列表数据结构","default":{"total":0,"tasks":[]}}},"type":"object","title":"ListTaskResponse","description":"GET /task 响应"},"ListTeamMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teams":{"items":{"$ref":"#/components/schemas/Team"},"type":"array","title":"Teams","description":"团队列表","default":[]}},"type":"object","title":"ListTeamMsg","description":"GET /team 数据结构"},"ListTeamMsgMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamMsgs":{"items":{"$ref":"#/components/schemas/TeamMsg"},"type":"array","title":"Teammsgs","description":"团队消息列表","default":[]}},"type":"object","title":"ListTeamMsgMsg","description":"GET /team/msg 数据结构"},"ListTeamMsgRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamMsgRequest"},"ListTeamMsgResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsgMsg"}],"description":"团队消息列表数据结构","default":{"total":0,"teamMsgs":[]}}},"type":"object","title":"ListTeamMsgResponse"},"ListTeamRequest":{"properties":{"teamType":{"allOf":[{"$ref":"#/components/schemas/TeamType"}],"description":"团队类型","default":"all"},"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"teamName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Teamname","description":"团队名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamRequest"},"ListTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsg"}],"description":"团队列表数据结构","default":{"total":0,"teams":[]}}},"type":"object","title":"ListTeamResponse","description":"GET /team 响应"},"ListTeamUserMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamUsers":{"items":{"$ref":"#/components/schemas/TeamUser"},"type":"array","title":"Teamusers","description":"团队成员列表","default":[]}},"type":"object","title":"ListTeamUserMsg","description":"GET /team/usr 数据结构"},"ListTeamUserRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"userSub":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Usersub","description":"用户ID"},"userName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Username","description":"用户名"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListTeamUserRequest"},"ListTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamUserMsg"}],"description":"团队成员列表数据结构","default":{"total":0,"teamUsers":[]}}},"type":"object","title":"ListTeamUserResponse"},"ListTestCaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/TestingTestCase"}],"description":"测试用例列表数据结构","default":{"aveScore":-1.0,"avePre":-1.0,"aveRec":-1.0,"aveFai":-1.0,"aveRel":-1.0,"aveLcs":-1.0,"aveLeve":-1.0,"aveJac":-1.0,"testCases":[]}}},"type":"object","title":"ListTestCaseResponse","description":"GET /testing/testcase 响应"},"ListTestingMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasetTestings":{"items":{"$ref":"#/components/schemas/DatasetTesting"},"type":"array","title":"Datasettestings","description":"数据集测试列表","default":[]}},"type":"object","title":"ListTestingMsg","description":"GET /testing 数据结构"},"ListTestingRequest":{"properties":{"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"testingId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Testingid","description":"测试id"},"testingName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Testingname","description":"测试名称"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"测试使用的大模型id"},"runStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Runstatus","description":"测试运行状态"},"scoresOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"测试评分","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"测试创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTestingRequest"},"ListTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTestingMsg"}],"description":"测试列表数据结构","default":{"total":0,"datasetTestings":[]}}},"type":"object","title":"ListTestingResponse","description":"GET /testing 响应"},"ListTokenizerResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"分词器的列表数据结构","default":[]}},"type":"object","title":"ListTokenizerResponse","description":"GET /other/tokenizer 响应"},"ListUserMessageMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"userMessages":{"items":{"$ref":"#/components/schemas/UserMsg"},"type":"array","title":"Usermessages","description":"用户消息列表","default":[]}},"type":"object","title":"ListUserMessageMsg","description":"GET /usr_msg 数据结构"},"ListUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListUserMessageMsg"}],"description":"用户消息列表数据结构","default":{"total":0,"userMessages":[]}}},"type":"object","title":"ListUserMessageResponse"},"OrderType":{"type":"string","enum":["asc","desc"],"title":"OrderType","description":"排序"},"ParseDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"ParseDocumentResponse","description":"POST /doc/parse 响应"},"ParseMethod":{"type":"string","enum":["general","ocr","enhanced","qa"],"title":"ParseMethod","description":"解析方法"},"RunTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"RunTestingResponse","description":"POST /testing/run 响应"},"SearchChunkMsg":{"properties":{"docChunks":{"items":{"$ref":"#/components/schemas/DocChunk"},"type":"array","title":"Docchunks","description":"文档分片列表","default":[]}},"type":"object","title":"SearchChunkMsg","description":"Post /chunk/search 数据结构"},"SearchChunkRequest":{"properties":{"kbIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Kbids","description":"资产id","default":[]},"query":{"type":"string","title":"Query","description":"查询内容","default":""},"topK":{"type":"integer","title":"Topk","description":"返回的结果数量","default":5},"docIds":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Docids","description":"文档id"},"bannedIds":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Bannedids","description":"禁止的分块id","default":[]},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"检索方法","default":"keyword_and_vector"},"isRelatedSurrounding":{"type":"boolean","title":"Isrelatedsurrounding","description":"是否关联上下文","default":true},"isClassifyByDoc":{"type":"boolean","title":"Isclassifybydoc","description":"是否按文档分类","default":false},"isRerank":{"type":"boolean","title":"Isrerank","description":"是否重新排序","default":false},"isCompress":{"type":"boolean","title":"Iscompress","description":"是否压缩","default":false},"tokensLimit":{"type":"integer","title":"Tokenslimit","description":"token限制","default":8192}},"type":"object","title":"SearchChunkRequest"},"SearchChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/SearchChunkMsg"}],"description":"文档分片列表数据结构","default":{"docChunks":[]}}},"type":"object","title":"SearchChunkResponse","description":"POST /chunk/search 响应"},"SearchMethod":{"type":"string","enum":["keyword","vector","keyword_and_vector","doc2chunk","doc2chunk_bfs","enhanced_by_llm"],"title":"SearchMethod","description":"搜索方法"},"Task":{"properties":{"opId":{"type":"string","format":"uuid","title":"Opid","description":"关联实体ID"},"opName":{"type":"string","title":"Opname","description":"关联实体的名称"},"taskId":{"type":"string","format":"uuid","title":"Taskid","description":"任务ID"},"taskStatus":{"$ref":"#/components/schemas/TaskStatus"},"taskType":{"$ref":"#/components/schemas/TaskType"},"taskCompleted":{"type":"number","title":"Taskcompleted","description":"任务完成度"},"createdTime":{"type":"string","title":"Createdtime","description":"任务创建时间"}},"type":"object","required":["opId","opName","taskId","taskStatus","taskType","taskCompleted","createdTime"],"title":"Task","description":"任务信息"},"TaskStatus":{"type":"string","enum":["pending","running","success","failed","canceled","deleted"],"title":"TaskStatus","description":"任务状态"},"TaskType":{"type":"string","enum":["doc_parse","kb_export","kb_import","dataset_export","dataset_import","dataset_generate","testing_run"],"title":"TaskType","description":"任务类型"},"Team":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","description":"团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","description":"团队描述"},"authorName":{"type":"string","title":"Authorname","description":"团队创建者的用户ID"},"memberCount":{"type":"integer","title":"Membercount","description":"团队成员数量"},"isPublic":{"type":"boolean","title":"Ispublic","description":"是否为公开团队"},"createdTime":{"type":"string","title":"Createdtime","description":"团队创建时间"}},"type":"object","required":["teamId","teamName","description","authorName","memberCount","isPublic","createdTime"],"title":"Team","description":"团队信息"},"TeamKnowledgebase":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","title":"Teamname","description":"团队名称"},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表","default":[]}},"type":"object","required":["teamId","teamName"],"title":"TeamKnowledgebase","description":"团队知识库信息"},"TeamMsg":{"properties":{"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"authorName":{"type":"string","title":"Authorname","description":"消息发送者的用户名"},"message":{"type":"string","title":"Message","description":"消息内容"}},"type":"object","required":["msgId","authorName","message"],"title":"TeamMsg","description":"团队信息"},"TeamType":{"type":"string","enum":["mycreated","myjoined","all"],"title":"TeamType","description":"团队类型"},"TeamUser":{"properties":{"userId":{"type":"string","format":"uuid","title":"Userid","description":"用户ID"},"userName":{"type":"string","title":"Username","description":"用户名"},"roleName":{"type":"string","title":"Rolename","description":"角色名称"}},"type":"object","required":["userId","userName","roleName"],"title":"TeamUser","description":"团队成员信息"},"TestCase":{"properties":{"testCaseId":{"type":"string","format":"uuid","title":"Testcaseid","description":"测试用例ID"},"question":{"type":"string","title":"Question","description":"问题"},"answer":{"type":"string","title":"Answer","description":"标准答案"},"llm_answer":{"type":"string","title":"Llm Answer","description":"大模型的回答"},"related_chunk":{"type":"string","title":"Related Chunk","description":"检索到的片段"},"doc_name":{"type":"string","title":"Doc Name","description":"来源文档"},"score":{"type":"number","title":"Score","description":"综合得分"},"pre":{"type":"number","title":"Pre","description":"精确率"},"rec":{"type":"number","title":"Rec","description":"召回率"},"fai":{"type":"number","title":"Fai","description":"忠实值"},"rel":{"type":"number","title":"Rel","description":"可解释性"},"lcs":{"type":"number","title":"Lcs","description":"最长公共子串得分"},"leve":{"type":"number","title":"Leve","description":"编辑距离得分"},"jac":{"type":"number","title":"Jac","description":"杰卡德相似系数"}},"type":"object","required":["testCaseId","question","answer","llm_answer","related_chunk","doc_name","score","pre","rec","fai","rel","lcs","leve","jac"],"title":"TestCase","description":"测试用例信息"},"Testing":{"properties":{"testingId":{"type":"string","format":"uuid","title":"Testingid","description":"测试ID"},"testingName":{"type":"string","title":"Testingname","description":"测试名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"测试描述","max":150},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"测试使用的大模型信息"},"searchMethod":{"$ref":"#/components/schemas/SearchMethod"},"testingTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"测试任务"},"aveScore":{"type":"number","title":"Avescore","description":"综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"杰卡德相似系数","default":-1},"authorName":{"type":"string","title":"Authorname","description":"测试创建者的用户名"},"status":{"$ref":"#/components/schemas/TestingStatus"}},"type":"object","required":["testingId","testingName","description","searchMethod","authorName","status"],"title":"Testing","description":"测试信息"},"TestingStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"TestingStatus","description":"测试状态"},"TestingTestCase":{"properties":{"aveScore":{"type":"number","title":"Avescore","description":"平均综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"平均精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"平均召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"平均忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"平均可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"平均最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"平均编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"平均杰卡德相似系数","default":-1},"testCases":{"items":{"$ref":"#/components/schemas/TestCase"},"type":"array","title":"Testcases","description":"测试用例列表","default":[]}},"type":"object","title":"TestingTestCase","description":"GET /testing/testcase 数据结构"},"Tokenizer":{"type":"string","enum":["中文","en","mix"],"title":"Tokenizer","description":"分词器"},"TypeAction":{"properties":{"actionType":{"$ref":"#/components/schemas/ActionType"},"actions":{"items":{"$ref":"#/components/schemas/action"},"type":"array","title":"Actions","description":"操作列表","default":[]}},"type":"object","required":["actionType"],"title":"TypeAction","description":"不同类别的类别操作"},"UpdateChunkEnabledResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"分片ID列表","default":[]}},"type":"object","title":"UpdateChunkEnabledResponse","description":"PUT /chunk 响应"},"UpdateChunkRequest":{"properties":{"text":{"type":"string","title":"Text","description":"分块文本内容","default":"这是一个默认的分块文本"},"enabled":{"type":"boolean","title":"Enabled","description":"分块是否启用","default":true}},"type":"object","title":"UpdateChunkRequest"},"UpdateChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"分片ID"}},"type":"object","title":"UpdateChunkResponse","description":"PUT /chunk 响应"},"UpdateDataRequest":{"properties":{"question":{"type":"string","maxLength":200,"minLength":1,"title":"Question","description":"问题","default":"这是一个默认的问题"},"answer":{"type":"string","maxLength":1000,"minLength":1,"title":"Answer","description":"答案","default":"这是一个默认的答案"}},"type":"object","title":"UpdateDataRequest"},"UpdateDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据ID"}},"type":"object","title":"UpdateDataResponse","description":"PUT /dataset/data 响应"},"UpdateDatasetRequest":{"properties":{"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""}},"type":"object","title":"UpdateDatasetRequest"},"UpdateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"UpdateDatasetResponse","description":"PUT /dataset 响应"},"UpdateDocumentRequest":{"properties":{"docName":{"type":"string","maxLength":150,"minLength":1,"title":"Docname","default":"这是一个默认的文档名称"},"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"chunkSize":{"type":"integer","title":"Chunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用","default":true}},"type":"object","title":"UpdateDocumentRequest"},"UpdateDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","format":"uuid","title":"Result","description":"文档ID"}},"type":"object","title":"UpdateDocumentResponse","description":"PUT /doc 响应"},"UpdateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":30,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"UpdateKnowledgeBaseRequest"},"UpdateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"UpdateKnowledgeBaseResponse","description":"PUT /kb 响应"},"UpdateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"UpdateRoleRequest"},"UpdateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"UpdateRoleResponse","description":"PUT /role 响应"},"UpdateTeamAuthorResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamAuthorResponse","description":"PUT /team/author 响应"},"UpdateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"UpdateTeamRequest"},"UpdateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamResponse","description":"PUT /team 响应"},"UpdateTeamUserRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队成员ID"}},"type":"object","title":"UpdateTeamUserRoleResponse","description":"PUT /team/usr 响应"},"UpdateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":150,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_and_vector"},"topK":{"type":"integer","title":"Topk","description":"测试中检索方法关联的片段数量","default":5}},"type":"object","required":["llmId"],"title":"UpdateTestingRequest"},"UpdateTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"UpdateTestingResponse","description":"PUT /testing 响应"},"UpdateUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"消息ID"}},"type":"object","title":"UpdateUserMessageResponse","description":"PUT /usr_msg 响应"},"UploadDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"UploadDocumentResponse","description":"POST /doc 响应"},"UserMessageStatus":{"type":"string","enum":["unread","accepted","rejected"],"title":"UserMessageStatus","description":"用户消息状态"},"UserMessageType":{"type":"string","enum":["invitation","application"],"title":"UserMessageType","description":"用户消息类型"},"UserMsg":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"senderId":{"type":"string","format":"uuid","title":"Senderid","description":"发送者ID"},"senderName":{"type":"string","title":"Sendername","description":"发送者名称"},"receiverId":{"type":"string","format":"uuid","title":"Receiverid","description":"接收者ID"},"receiverName":{"type":"string","title":"Receivername","description":"接收者名称"},"msgType":{"allOf":[{"$ref":"#/components/schemas/UserMessageType"}],"description":"消息类型"},"msgStatus":{"allOf":[{"$ref":"#/components/schemas/UserMessageStatus"}],"description":"消息状态"},"createdTime":{"type":"string","title":"Createdtime","description":"创建时间"}},"type":"object","required":["teamId","msgId","senderId","senderName","receiverId","receiverName","msgType","msgStatus","createdTime"],"title":"UserMsg","description":"用户消息"},"UserStatus":{"type":"string","enum":["active","inactive","deleted"],"title":"UserStatus","description":"用户状态"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"action":{"properties":{"actionName":{"type":"string","title":"Actionname","description":"操作名称","max":20,"min":1},"action":{"type":"string","title":"Action","description":"操作","max":20,"min":1},"isUsed":{"type":"boolean","title":"Isused","description":"是否启用"}},"type":"object","required":["actionName","action","isUsed"],"title":"action","description":"操作信息"},"role":{"properties":{"roleId":{"type":"string","format":"uuid","title":"Roleid","description":"角色ID"},"roleName":{"type":"string","title":"Rolename","description":"角色名称","max":20,"min":1},"typeActions":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Typeactions","description":"操作类型列表","default":[]}},"type":"object","required":["roleId","roleName"],"title":"role","description":"角色信息"}}}} \ No newline at end of file +{"openapi":"3.1.0","info":{"title":"FastAPI","version":"0.1.0"},"paths":{"/routes":{"get":{"summary":"Get All Routes","operationId":"get_all_routes_routes_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/team/list":{"post":{"tags":["Team"],"summary":"List Teams","operationId":"list_teams_team_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/usr":{"post":{"tags":["Team"],"summary":"List Team User By Team Id","operationId":"list_team_user_by_team_id_team_usr_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_usr_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamUserRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team User By Team Id And User Subs","operationId":"delete_team_user_by_team_id_and_user_subs_team_usr_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string"},"title":"Usersub"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/msg":{"post":{"tags":["Team"],"summary":"List Team Msg By Team Id","operationId":"list_team_msg_by_team_id_team_msg_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team":{"post":{"tags":["Team"],"summary":"Create Team","operationId":"create_team_team_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team By Team Id","operationId":"delete_team_by_team_id_team_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/invitation":{"post":{"tags":["Team"],"summary":"Invite Team User By User Sub","operationId":"invite_team_user_by_user_sub_team_invitation_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"userSubInvite","in":"query","required":true,"schema":{"type":"string","title":"Usersubinvite"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/InviteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/application":{"post":{"tags":["Team"],"summary":"Join Team","operationId":"join_team_team_application_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JoinTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/author":{"put":{"tags":["Team"],"summary":"Update Team Author By Team Id","operationId":"update_team_author_by_team_id_team_author_put","parameters":[{"name":"recriverSub","in":"query","required":true,"schema":{"type":"string","title":"Recriversub"}},{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamAuthorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb":{"get":{"tags":["Knowledge Base"],"summary":"List Kb By User Sub","operationId":"list_kb_by_user_sub_kb_get","parameters":[{"name":"kbName","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListAllKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"tags":["Knowledge Base"],"summary":"Create Kb","operationId":"create_kb_kb_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Knowledge Base"],"summary":"Update Kb By Kb Id","operationId":"update_kb_by_kb_id_kb_put","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Knowledge Base"],"summary":"Delete Kb By Kb Ids","operationId":"delete_kb_by_kb_ids_kb_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/team":{"post":{"tags":["Knowledge Base"],"summary":"List Kb By Team Id","operationId":"list_kb_by_team_id_kb_team_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/doc_type":{"get":{"tags":["Knowledge Base"],"summary":"List Doc Types By Kb Id","operationId":"list_doc_types_by_kb_id_kb_doc_type_get","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentTypesResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/download":{"get":{"tags":["Knowledge Base"],"summary":"Download Kb By Task Id","operationId":"download_kb_by_task_id_kb_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/import":{"post":{"tags":["Knowledge Base"],"summary":"Import Kbs","operationId":"import_kbs_kb_import_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_kbs_kb_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/export":{"post":{"tags":["Knowledge Base"],"summary":"Export Kb By Kb Ids","operationId":"export_kb_by_kb_ids_kb_export_post","parameters":[{"name":"kbIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/list":{"post":{"tags":["Chunk"],"summary":"List Chunks By Document Id","operationId":"list_chunks_by_document_id_chunk_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/search":{"post":{"tags":["Chunk"],"summary":"Search Chunks","operationId":"search_chunks_chunk_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/":{"put":{"tags":["Chunk"],"summary":"Update Chunk By Id","operationId":"update_chunk_by_id_chunk__put","parameters":[{"name":"chunkId","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Chunkid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/switch":{"put":{"tags":["Chunk"],"summary":"Update Chunk Enabled By Id","operationId":"update_chunk_enabled_by_id_chunk_switch_put","parameters":[{"name":"enabled","in":"query","required":true,"schema":{"type":"boolean","title":"Enabled"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Chunkid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkEnabledResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/list":{"post":{"tags":["Document"],"summary":"List Doc","operationId":"list_doc_doc_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/download":{"get":{"tags":["Document"],"summary":"Download Doc By Id","operationId":"download_doc_by_id_doc_download_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/report":{"get":{"tags":["Document"],"summary":"Get Doc Report","operationId":"get_doc_report_doc_report_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetDocumentReportResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc":{"post":{"tags":["Document"],"summary":"Upload Docs","operationId":"upload_docs_doc_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_upload_docs_doc_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UploadDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Document"],"summary":"Update Doc By Doc Id","operationId":"update_doc_by_doc_id_doc_put","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/parse":{"post":{"tags":["Document"],"summary":"Parse Docuement By Doc Ids","operationId":"parse_docuement_by_doc_ids_doc_parse_post","parameters":[{"name":"docIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}},{"name":"parse","in":"query","required":true,"schema":{"type":"boolean","title":"Parse"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ParseDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/switch":{"put":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_switch_put","requestBody":{"content":{"application/json":{"schema":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Docids"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/health_check":{"get":{"tags":["Health check"],"summary":"Health Check","operationId":"health_check_health_check_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/dataset/list":{"post":{"tags":["Dataset"],"summary":"List Dataset By Kb Id","operationId":"list_dataset_by_kb_id_dataset_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/data":{"post":{"tags":["Dataset"],"summary":"List Data In Dataset","operationId":"list_data_in_dataset_dataset_data_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Data By Dataset Id","operationId":"update_data_by_dataset_id_dataset_data_put","parameters":[{"name":"dataId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Dataid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Data By Data Ids","operationId":"delete_data_by_data_ids_dataset_data_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Dataids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/testing/exist":{"get":{"tags":["Dataset"],"summary":"Is Dataset Have Testing","operationId":"is_dataset_have_testing_dataset_testing_exist_get","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/IsDatasetHaveTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/download":{"get":{"tags":["Dataset"],"summary":"Download Dataset By Task Id","operationId":"download_dataset_by_task_id_dataset_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset":{"post":{"tags":["Dataset"],"summary":"Create Dataset","operationId":"create_dataset_dataset_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Dataset By Dataset Id","operationId":"update_dataset_by_dataset_id_dataset_put","parameters":[{"name":"databaseId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Databaseid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Dataset By Dataset Ids","operationId":"delete_dataset_by_dataset_ids_dataset_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Databaseid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/import":{"post":{"tags":["Dataset"],"summary":"Import Dataset","operationId":"import_dataset_dataset_import_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_dataset_dataset_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/export":{"post":{"tags":["Dataset"],"summary":"Export Dataset By Dataset Ids","operationId":"export_dataset_by_dataset_ids_dataset_export_post","parameters":[{"name":"datasetIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Datasetids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/generate":{"post":{"tags":["Dataset"],"summary":"Generate Dataset By Id","operationId":"generate_dataset_by_id_dataset_generate_post","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}},{"name":"generate","in":"query","required":true,"schema":{"type":"boolean","title":"Generate"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/other/llm":{"get":{"tags":["Other"],"summary":"List Llms By User Sub","operationId":"list_llms_by_user_sub_other_llm_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListLLMResponse"}}}}}}},"/other/embedding":{"get":{"tags":["Other"],"summary":"List Embeddings","operationId":"list_embeddings_other_embedding_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListEmbeddingResponse"}}}}}}},"/other/tokenizer":{"get":{"tags":["Other"],"summary":"List Tokenizers","operationId":"list_tokenizers_other_tokenizer_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTokenizerResponse"}}}}}}},"/other/parse_method":{"get":{"tags":["Other"],"summary":"List Parse Method","operationId":"list_parse_method_other_parse_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListParseMethodResponse"}}}}}}},"/other/search_method":{"get":{"tags":["Other"],"summary":"List Search Method","operationId":"list_search_method_other_search_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListSearchMethodResponse"}}}}}}},"/testing/list":{"post":{"tags":["Testing"],"summary":"List Testing By Kb Id","operationId":"list_testing_by_kb_id_testing_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/testcase":{"post":{"tags":["Testing"],"summary":"List Testcase By Testing Id","operationId":"list_testcase_by_testing_id_testing_testcase_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestCaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/download":{"get":{"tags":["Testing"],"summary":"Download Testing Report By Testing Id","operationId":"download_testing_report_by_testing_id_testing_download_get","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing":{"post":{"tags":["Testing"],"summary":"Create Testing","operationId":"create_testing_testing_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingResponsing"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Testing"],"summary":"Update Testing By Testing Id","operationId":"update_testing_by_testing_id_testing_put","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Testing"],"summary":"Delete Testing By Testing Ids","operationId":"delete_testing_by_testing_ids_testing_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Testingids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/run":{"post":{"tags":["Testing"],"summary":"Run Testing By Testing Id","operationId":"run_testing_by_testing_id_testing_run_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}},{"name":"run","in":"query","required":true,"schema":{"type":"boolean","title":"Run"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/RunTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role/action":{"get":{"tags":["Role"],"summary":"List Actions","operationId":"list_actions_role_action_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListActionResponse"}}}}}}},"/role/list":{"post":{"tags":["Role"],"summary":"List Role By Team Id","operationId":"list_role_by_team_id_role_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role":{"post":{"tags":["Role"],"summary":"Create Role","operationId":"create_role_role_post","parameters":[{"name":"TeamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Role"],"summary":"Update Role By Role Id","operationId":"update_role_by_role_id_role_put","parameters":[{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Role"],"summary":"Delete Role By Role Ids","operationId":"delete_role_by_role_ids_role_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Roleid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg/list":{"post":{"tags":["User Message"],"summary":"List User Msgs By User Sub","operationId":"list_user_msgs_by_user_sub_usr_msg_list_post","parameters":[{"name":"msgType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserMessageType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg":{"put":{"tags":["User Message"],"summary":"Update User Msg By Msg Id","operationId":"update_user_msg_by_msg_id_usr_msg_put","parameters":[{"name":"msgId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Msgid"}},{"name":"msgStatus","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserStatus"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["User Message"],"summary":"Delete User Msg By Msg Ids","operationId":"delete_user_msg_by_msg_ids_usr_msg_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Msgids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task":{"post":{"tags":["Task"],"summary":"List Task","operationId":"list_task_task_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/one":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Id","operationId":"delete_task_by_task_id_task_one_delete","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByIdResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/all":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Type","operationId":"delete_task_by_task_type_task_all_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"taskType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/TaskType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByTypeResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"ActionType":{"type":"string","enum":["team","user","role","knowledge_base","document","chunk","dataset","testing","dataset_data","task"],"title":"ActionType","description":"操作类型"},"Body_import_dataset_dataset_import_post":{"properties":{"dataset_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Dataset Packages"}},"type":"object","required":["dataset_packages"],"title":"Body_import_dataset_dataset_import_post"},"Body_import_kbs_kb_import_post":{"properties":{"kb_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Kb Packages"}},"type":"object","required":["kb_packages"],"title":"Body_import_kbs_kb_import_post"},"Body_upload_docs_doc_post":{"properties":{"docs":{"items":{"type":"string","format":"binary"},"type":"array","title":"Docs"}},"type":"object","required":["docs"],"title":"Body_upload_docs_doc_post"},"Chunk":{"properties":{"chunkId":{"type":"string","format":"uuid","title":"Chunkid","description":"分片ID"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"分片类型"},"text":{"type":"string","title":"Text","description":"分片文本"}},"type":"object","required":["chunkId","chunkType","text"],"title":"Chunk","description":"文档分片信息"},"ChunkType":{"type":"string","enum":["text","table","image","code","link","qa"],"title":"ChunkType","description":"分块类型"},"CreateDatasetRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""},"documentIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Documentids","description":"测试数据集关联的文档","default":[]},"dataCnt":{"type":"integer","title":"Datacnt","description":"测试数据集内的数据数量","default":64,"max":512,"min":1},"llmId":{"type":"string","title":"Llmid","description":"测试数据集使用的大模型id"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"测试数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"测试数据集进行上下文关联","default":false}},"type":"object","required":["kbId","llmId"],"title":"CreateDatasetRequest"},"CreateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集生成任务ID"}},"type":"object","title":"CreateDatasetResponse","description":"POST /dataset 响应"},"CreateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":20,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"知识库使用的embedding模型","default":""},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"CreateKnowledgeBaseRequest"},"CreateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"CreateKnowledgeBaseResponse","description":"POST /kb 响应"},"CreateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"CreateRoleRequest"},"CreateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"CreateRoleResponse","description":"POST /role 响应"},"CreateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"CreateTeamRequest"},"CreateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"CreateTeamResponse","description":"POST /team 响应"},"CreateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":30,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"测试数据集id"},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_and_vector"},"topK":{"type":"integer","title":"Topk","description":"测试中检索方法关联的片段数量","default":5}},"type":"object","required":["datasetId","llmId"],"title":"CreateTestingRequest"},"CreateTestingResponsing":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"CreateTestingResponsing","description":"POST /testing 响应"},"Data":{"properties":{"dataId":{"type":"string","format":"uuid","title":"Dataid","description":"数据ID"},"docName":{"type":"string","title":"Docname","description":"数据关联的文档名称"},"question":{"type":"string","title":"Question","description":"数据的问题"},"answer":{"type":"string","title":"Answer","description":"数据的答案"},"chunk":{"type":"string","title":"Chunk","description":"数据的片段"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"数据的片段类型"}},"type":"object","required":["dataId","docName","question","answer","chunk","chunkType"],"title":"Data"},"DataSetStatus":{"type":"string","enum":["idle","pending","generating","exporting","importing","deleted"],"title":"DataSetStatus","description":"数据集状态"},"Dataset":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"数据集描述","max":150},"dataCnt":{"type":"integer","title":"Datacnt","description":"数据集数据数量"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"数据集进行上下文关联","default":false},"isImported":{"type":"boolean","title":"Isimported","description":"数据集是否导入","default":false},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"生成数据集使用的大模型信息"},"generateTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"数据集生成任务"},"score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Score","description":"数据集评分"},"authorName":{"type":"string","title":"Authorname","description":"数据集创建者的用户名"},"status":{"$ref":"#/components/schemas/DataSetStatus"}},"type":"object","required":["datasetId","datasetName","description","dataCnt","authorName","status"],"title":"Dataset","description":"数据集信息"},"DatasetTesting":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称"},"testings":{"items":{"$ref":"#/components/schemas/Testing"},"type":"array","title":"Testings","description":"测试列表","default":[]}},"type":"object","required":["datasetId","datasetName"],"title":"DatasetTesting","description":"数据集测试信息"},"DeleteDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据ID列表","default":[]}},"type":"object","title":"DeleteDataResponse","description":"DELETE /dataset/data 响应"},"DeleteDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据集ID列表","default":[]}},"type":"object","title":"DeleteDatasetResponse","description":"DELETE /dataset 响应"},"DeleteDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"DeleteDocumentResponse","description":"DELETE /doc 响应"},"DeleteKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"知识库ID列表","default":[]}},"type":"object","title":"DeleteKnowledgeBaseResponse","description":"DELETE /kb 响应"},"DeleteRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"角色ID列表","default":[]}},"type":"object","title":"DeleteRoleResponse","description":"DELETE /role 响应"},"DeleteTaskByIdResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"任务ID"}},"type":"object","title":"DeleteTaskByIdResponse","description":"DELETE /task/one 响应"},"DeleteTaskByTypeResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"DeleteTaskByTypeResponse","description":"DELETE /task/all 响应"},"DeleteTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"DeleteTeamResponse","description":"DELETE /team 响应"},"DeleteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"团队成员ID列表","default":[]}},"type":"object","title":"DeleteTeamUserResponse","description":"DELETE /team/usr 响应"},"DeleteTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"测试ID列表","default":[]}},"type":"object","title":"DeleteTestingResponse","description":"DELETE /testing 响应"},"DeleteUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"消息ID列表","default":[]}},"type":"object","title":"DeleteUserMessageResponse","description":"DELETE /usr_msg 响应"},"DocChunk":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","required":["docId","docName"],"title":"DocChunk","description":"Post /chunk/search 数据结构"},"Document":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"docType":{"allOf":[{"$ref":"#/components/schemas/DocumentType-Output"}],"description":"文档类型"},"chunkSize":{"type":"integer","title":"Chunksize","description":"文档分片大小"},"createdTime":{"type":"string","title":"Createdtime","description":"文档创建时间"},"docTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"文档任务"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"文档解析方法"},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用"},"authorName":{"type":"string","title":"Authorname","description":"文档创建者的用户名"},"status":{"$ref":"#/components/schemas/DocumentStatus"}},"type":"object","required":["docId","docName","docType","chunkSize","createdTime","parseMethod","enabled","authorName","status"],"title":"Document","description":"文档信息"},"DocumentStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"DocumentStatus","description":"文档状态"},"DocumentType-Input":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"docTypeName":{"type":"string","maxLength":20,"minLength":1,"title":"Doctypename","default":"这是一个默认的文档类型名称"}},"type":"object","title":"DocumentType"},"DocumentType-Output":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型ID"},"docTypeName":{"type":"string","title":"Doctypename","description":"文档类型名称"}},"type":"object","required":["docTypeId","docTypeName"],"title":"DocumentType","description":"文档类型信息"},"ExportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ExportDatasetResponse","description":"POST /dataset/export 响应"},"ExportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ExportKnowledgeBaseResponse","description":"POST /kb/export 响应"},"GenerateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"GenerateDatasetResponse","description":"POST /dataset/generate 响应"},"GetDocumentReportResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","title":"Result","description":"文档报告数据结构","default":""}},"type":"object","title":"GetDocumentReportResponse","description":"GET /doc/report 响应"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ImportDatasetResponse","description":"POST /dataset/import 响应"},"ImportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ImportKnowledgeBaseResponse","description":"POST /kb/import 响应"},"InviteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"邀请ID"}},"type":"object","title":"InviteTeamUserResponse","description":"POST /team/invitation 响应"},"IsDatasetHaveTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"boolean","title":"Result","description":"数据集是否有测试任务","default":false}},"type":"object","title":"IsDatasetHaveTestingResponse","description":"GET /dataset/testing/exist 响应"},"JoinTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"申请ID"}},"type":"object","title":"JoinTeamResponse","description":"POST /team/application 响应"},"Knowledgebase":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"知识库ID"},"kbName":{"type":"string","title":"Kbname","description":"知识库名称","max":20,"min":1},"authorName":{"type":"string","title":"Authorname","description":"知识库创建者的用户名"},"tokenizer":{"$ref":"#/components/schemas/Tokenizer"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"嵌入模型"},"description":{"type":"string","title":"Description","description":"知识库描述","max":150},"docCnt":{"type":"integer","title":"Doccnt","description":"知识库文档数量"},"docSize":{"type":"integer","title":"Docsize","description":"知识库文档大小"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库单次文件上传数量限制"},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库单次文件上传大小限制"},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"默认解析方法"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"默认分块大小"},"createdTime":{"type":"string","title":"Createdtime","description":"知识库创建时间"},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Doctypes","description":"知识库文档类型列表","default":[]}},"type":"object","required":["kbId","kbName","authorName","tokenizer","embeddingModel","description","docCnt","docSize","uploadCountLimit","uploadSizeLimit","defaultParseMethod","defaultChunkSize","createdTime"],"title":"Knowledgebase","description":"知识库信息"},"LLM":{"properties":{"llmId":{"type":"string","title":"Llmid","description":"大模型ID"},"llmName":{"type":"string","title":"Llmname","description":"大模型名称","max":20,"min":1},"llmIcon":{"type":"string","title":"Llmicon","description":"大模型图标"}},"type":"object","required":["llmId","llmName","llmIcon"],"title":"LLM"},"ListActionMsg":{"properties":{"actionTypes":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Actiontypes","description":"操作类型列表","default":[]}},"type":"object","title":"ListActionMsg","description":"GET /role/action 数据结构"},"ListActionResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListActionMsg"}],"description":"操作列表数据结构","default":{"actionTypes":[]}}},"type":"object","title":"ListActionResponse"},"ListAllKnowledgeBaseMsg":{"properties":{"teamKnowledgebases":{"items":{"$ref":"#/components/schemas/TeamKnowledgebase"},"type":"array","title":"Teamknowledgebases","description":"团队知识库列表","default":[]}},"type":"object","title":"ListAllKnowledgeBaseMsg","description":"GET /kb 数据结构"},"ListAllKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListAllKnowledgeBaseMsg"}],"description":"团队知识库列表数据结构","default":{"teamKnowledgebases":[]}}},"type":"object","title":"ListAllKnowledgeBaseResponse","description":"GET /kb 响应"},"ListChunkMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","title":"ListChunkMsg","description":"GET /chunk 数据结构"},"ListChunkRequest":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档id"},"text":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Text","description":"分块文本内容"},"types":{"anyOf":[{"items":{"$ref":"#/components/schemas/ChunkType"},"type":"array"},{"type":"null"}],"title":"Types","description":"分块类型"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["docId"],"title":"ListChunkRequest"},"ListChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListChunkMsg"}],"description":"分片列表数据结构","default":{"total":0,"chunks":[]}}},"type":"object","title":"ListChunkResponse","description":"GET /chunk 响应"},"ListDataInDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"data":{"items":{"$ref":"#/components/schemas/Data"},"type":"array","title":"Data","description":"数据列表","default":[]}},"type":"object","title":"ListDataInDatasetMsg","description":"GET /dataset/data 数据结构"},"ListDataInDatasetRequest":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["datasetId"],"title":"ListDataInDatasetRequest"},"ListDataInDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDataInDatasetMsg"}],"description":"数据列表数据结构","default":{"total":0,"data":[]}}},"type":"object","title":"ListDataInDatasetResponse","description":"GET /dataset/data 响应"},"ListDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasets":{"items":{"$ref":"#/components/schemas/Dataset"},"type":"array","title":"Datasets","description":"数据集列表","default":[]}},"type":"object","title":"ListDatasetMsg","description":"GET /dataset 数据结构"},"ListDatasetRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"datasetId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Datasetid","description":"数据集id"},"datasetName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Datasetname","description":"数据集名称"},"dataCnt":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集数据数量","default":"desc"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"数据集使用的大模型id"},"isDataCleared":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Isdatacleared","description":"数据集是否清洗"},"isChunkRelated":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Ischunkrelated","description":"数据集是否上下文关联"},"generateStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Generatestatus","description":"数据集生成状态"},"scoreOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集评分的排序方法","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"数据集创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["kbId"],"title":"ListDatasetRequest"},"ListDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDatasetMsg"}],"description":"数据集列表数据结构","default":{"total":0,"datasets":[]}}},"type":"object","title":"ListDatasetResponse","description":"GET /dataset 响应"},"ListDocumentMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"documents":{"items":{"$ref":"#/components/schemas/Document"},"type":"array","title":"Documents","description":"文档列表","default":[]}},"type":"object","title":"ListDocumentMsg","description":"GET /doc 数据结构"},"ListDocumentRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"docId":{"anyOf":[{"type":"string","maxLength":30,"minLength":1,"format":"uuid"},{"type":"null"}],"title":"Docid","description":"文档id"},"docName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Docname","description":"文档名称"},"docTypeId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Doctypeid","description":"文档类型id"},"parseStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Parsestatus","description":"文档解析状态"},"parseMethod":{"anyOf":[{"$ref":"#/components/schemas/ParseMethod"},{"type":"null"}],"description":"文档解析方法"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"文档创建者"},"createdTimeStart":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimestart","description":"文档创建时间开始"},"createdTimeEnd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimeend","description":"文档创建时间结束"},"createdTimeOrder":{"allOf":[{"$ref":"#/components/schemas/OrderType"}],"description":"文档创建时间排序","default":"desc"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["kbId"],"title":"ListDocumentRequest"},"ListDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDocumentMsg"}],"description":"文档列表数据结构","default":{"total":0,"documents":[]}}},"type":"object","title":"ListDocumentResponse","description":"GET /doc 响应"},"ListDocumentTypesResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Result","description":"文档类型列表数据结构","default":[]}},"type":"object","title":"ListDocumentTypesResponse","description":"GET /kb/doc_type 响应"},"ListEmbeddingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"向量化模型的列表数据结构","default":[]}},"type":"object","title":"ListEmbeddingResponse","description":"GET /other/embedding 数据结构"},"ListKnowledgeBaseMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表数据结构","default":[]}},"type":"object","title":"ListKnowledgeBaseMsg"},"ListKnowledgeBaseRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"kbName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname","description":"资产名称"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"资产创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListKnowledgeBaseRequest"},"ListKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListKnowledgeBaseMsg"}],"default":{"total":0,"kbList":[]}}},"type":"object","title":"ListKnowledgeBaseResponse","description":"GET /kb/team 响应"},"ListLLMMsg":{"properties":{"llms":{"items":{"$ref":"#/components/schemas/LLM"},"type":"array","title":"Llms","description":"大模型列表","default":[]}},"type":"object","title":"ListLLMMsg","description":"GET /other/llm 数据结构"},"ListLLMResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListLLMMsg"}],"description":"大模型列表数据结构","default":{"llms":[]}}},"type":"object","title":"ListLLMResponse","description":"GET /other/llm 响应"},"ListParseMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"解析方法的列表数据结构","default":[]}},"type":"object","title":"ListParseMethodResponse","description":"\"GET /other/parse_method 响应"},"ListRoleMsg":{"properties":{"roles":{"items":{"$ref":"#/components/schemas/role"},"type":"array","title":"Roles","description":"角色列表","default":[]}},"type":"object","title":"ListRoleMsg","description":"GET /role 数据结构"},"ListRoleRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队id"},"roleId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Roleid","description":"角色id"},"roleName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rolename","description":"角色名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListRoleRequest"},"ListRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListRoleMsg"}],"description":"角色列表数据结构","default":{"roles":[]}}},"type":"object","title":"ListRoleResponse","description":"GET /role 响应"},"ListSearchMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"搜索方法的列表数据结构","default":[]}},"type":"object","title":"ListSearchMethodResponse","description":"GET /other/search_method 响应"},"ListTaskMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"tasks":{"items":{"$ref":"#/components/schemas/Task"},"type":"array","title":"Tasks","description":"任务列表","default":[]}},"type":"object","title":"ListTaskMsg","description":"GET /task 数据结构"},"ListTaskRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队id"},"taskId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Taskid","description":"任务id"},"taskType":{"anyOf":[{"$ref":"#/components/schemas/TaskType"},{"type":"null"}],"description":"任务类型"},"taskStatus":{"anyOf":[{"$ref":"#/components/schemas/TaskStatus"},{"type":"null"}],"description":"任务状态"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListTaskRequest"},"ListTaskResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTaskMsg"}],"description":"任务列表数据结构","default":{"total":0,"tasks":[]}}},"type":"object","title":"ListTaskResponse","description":"GET /task 响应"},"ListTeamMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teams":{"items":{"$ref":"#/components/schemas/Team"},"type":"array","title":"Teams","description":"团队列表","default":[]}},"type":"object","title":"ListTeamMsg","description":"GET /team 数据结构"},"ListTeamMsgMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamMsgs":{"items":{"$ref":"#/components/schemas/TeamMsg"},"type":"array","title":"Teammsgs","description":"团队消息列表","default":[]}},"type":"object","title":"ListTeamMsgMsg","description":"GET /team/msg 数据结构"},"ListTeamMsgRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamMsgRequest"},"ListTeamMsgResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsgMsg"}],"description":"团队消息列表数据结构","default":{"total":0,"teamMsgs":[]}}},"type":"object","title":"ListTeamMsgResponse"},"ListTeamRequest":{"properties":{"teamType":{"allOf":[{"$ref":"#/components/schemas/TeamType"}],"description":"团队类型","default":"all"},"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"teamName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Teamname","description":"团队名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamRequest"},"ListTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsg"}],"description":"团队列表数据结构","default":{"total":0,"teams":[]}}},"type":"object","title":"ListTeamResponse","description":"GET /team 响应"},"ListTeamUserMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamUsers":{"items":{"$ref":"#/components/schemas/TeamUser"},"type":"array","title":"Teamusers","description":"团队成员列表","default":[]}},"type":"object","title":"ListTeamUserMsg","description":"GET /team/usr 数据结构"},"ListTeamUserRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"userSub":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Usersub","description":"用户ID"},"userName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Username","description":"用户名"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListTeamUserRequest"},"ListTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamUserMsg"}],"description":"团队成员列表数据结构","default":{"total":0,"teamUsers":[]}}},"type":"object","title":"ListTeamUserResponse"},"ListTestCaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/TestingTestCase"}],"description":"测试用例列表数据结构","default":{"aveScore":-1.0,"avePre":-1.0,"aveRec":-1.0,"aveFai":-1.0,"aveRel":-1.0,"aveLcs":-1.0,"aveLeve":-1.0,"aveJac":-1.0,"testCases":[]}}},"type":"object","title":"ListTestCaseResponse","description":"GET /testing/testcase 响应"},"ListTestingMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasetTestings":{"items":{"$ref":"#/components/schemas/DatasetTesting"},"type":"array","title":"Datasettestings","description":"数据集测试列表","default":[]}},"type":"object","title":"ListTestingMsg","description":"GET /testing 数据结构"},"ListTestingRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"testingId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Testingid","description":"测试id"},"testingName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Testingname","description":"测试名称"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"测试使用的大模型id"},"runStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Runstatus","description":"测试运行状态"},"scoresOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"测试评分","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"测试创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["kbId"],"title":"ListTestingRequest"},"ListTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTestingMsg"}],"description":"测试列表数据结构","default":{"total":0,"datasetTestings":[]}}},"type":"object","title":"ListTestingResponse","description":"GET /testing 响应"},"ListTokenizerResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"分词器的列表数据结构","default":[]}},"type":"object","title":"ListTokenizerResponse","description":"GET /other/tokenizer 响应"},"ListUserMessageMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"userMessages":{"items":{"$ref":"#/components/schemas/UserMsg"},"type":"array","title":"Usermessages","description":"用户消息列表","default":[]}},"type":"object","title":"ListUserMessageMsg","description":"GET /usr_msg 数据结构"},"ListUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListUserMessageMsg"}],"description":"用户消息列表数据结构","default":{"total":0,"userMessages":[]}}},"type":"object","title":"ListUserMessageResponse"},"OrderType":{"type":"string","enum":["asc","desc"],"title":"OrderType","description":"排序"},"ParseDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"ParseDocumentResponse","description":"POST /doc/parse 响应"},"ParseMethod":{"type":"string","enum":["general","ocr","enhanced","qa"],"title":"ParseMethod","description":"解析方法"},"RunTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"RunTestingResponse","description":"POST /testing/run 响应"},"SearchChunkMsg":{"properties":{"docChunks":{"items":{"$ref":"#/components/schemas/DocChunk"},"type":"array","title":"Docchunks","description":"文档分片列表","default":[]}},"type":"object","title":"SearchChunkMsg","description":"Post /chunk/search 数据结构"},"SearchChunkRequest":{"properties":{"kbIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Kbids","description":"资产id","default":[]},"query":{"type":"string","title":"Query","description":"查询内容","default":""},"topK":{"type":"integer","title":"Topk","description":"返回的结果数量","default":5},"docIds":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Docids","description":"文档id"},"bannedIds":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Bannedids","description":"禁止的分块id","default":[]},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"检索方法","default":"keyword_and_vector"},"isRelatedSurrounding":{"type":"boolean","title":"Isrelatedsurrounding","description":"是否关联上下文","default":true},"isClassifyByDoc":{"type":"boolean","title":"Isclassifybydoc","description":"是否按文档分类","default":false},"isRerank":{"type":"boolean","title":"Isrerank","description":"是否重新排序","default":false},"isCompress":{"type":"boolean","title":"Iscompress","description":"是否压缩","default":false},"tokensLimit":{"type":"integer","title":"Tokenslimit","description":"token限制","default":8192}},"type":"object","title":"SearchChunkRequest"},"SearchChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/SearchChunkMsg"}],"description":"文档分片列表数据结构","default":{"docChunks":[]}}},"type":"object","title":"SearchChunkResponse","description":"POST /chunk/search 响应"},"SearchMethod":{"type":"string","enum":["keyword","vector","keyword_and_vector","doc2chunk","doc2chunk_bfs","enhanced_by_llm"],"title":"SearchMethod","description":"搜索方法"},"Task":{"properties":{"opId":{"type":"string","format":"uuid","title":"Opid","description":"关联实体ID"},"opName":{"type":"string","title":"Opname","description":"关联实体的名称"},"taskId":{"type":"string","format":"uuid","title":"Taskid","description":"任务ID"},"taskStatus":{"$ref":"#/components/schemas/TaskStatus"},"taskType":{"$ref":"#/components/schemas/TaskType"},"taskCompleted":{"type":"number","title":"Taskcompleted","description":"任务完成度"},"createdTime":{"type":"string","title":"Createdtime","description":"任务创建时间"}},"type":"object","required":["opId","opName","taskId","taskStatus","taskType","taskCompleted","createdTime"],"title":"Task","description":"任务信息"},"TaskStatus":{"type":"string","enum":["pending","running","success","failed","canceled","deleted"],"title":"TaskStatus","description":"任务状态"},"TaskType":{"type":"string","enum":["doc_parse","kb_export","kb_import","dataset_export","dataset_import","dataset_generate","testing_run"],"title":"TaskType","description":"任务类型"},"Team":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","description":"团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","description":"团队描述"},"authorName":{"type":"string","title":"Authorname","description":"团队创建者的用户ID"},"memberCount":{"type":"integer","title":"Membercount","description":"团队成员数量"},"isPublic":{"type":"boolean","title":"Ispublic","description":"是否为公开团队"},"createdTime":{"type":"string","title":"Createdtime","description":"团队创建时间"}},"type":"object","required":["teamId","teamName","description","authorName","memberCount","isPublic","createdTime"],"title":"Team","description":"团队信息"},"TeamKnowledgebase":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","title":"Teamname","description":"团队名称"},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表","default":[]}},"type":"object","required":["teamId","teamName"],"title":"TeamKnowledgebase","description":"团队知识库信息"},"TeamMsg":{"properties":{"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"authorName":{"type":"string","title":"Authorname","description":"消息发送者的用户名"},"message":{"type":"string","title":"Message","description":"消息内容"}},"type":"object","required":["msgId","authorName","message"],"title":"TeamMsg","description":"团队信息"},"TeamType":{"type":"string","enum":["mycreated","myjoined","all"],"title":"TeamType","description":"团队类型"},"TeamUser":{"properties":{"userId":{"type":"string","format":"uuid","title":"Userid","description":"用户ID"},"userName":{"type":"string","title":"Username","description":"用户名"},"roleName":{"type":"string","title":"Rolename","description":"角色名称"}},"type":"object","required":["userId","userName","roleName"],"title":"TeamUser","description":"团队成员信息"},"TestCase":{"properties":{"testCaseId":{"type":"string","format":"uuid","title":"Testcaseid","description":"测试用例ID"},"question":{"type":"string","title":"Question","description":"问题"},"answer":{"type":"string","title":"Answer","description":"标准答案"},"llm_answer":{"type":"string","title":"Llm Answer","description":"大模型的回答"},"related_chunk":{"type":"string","title":"Related Chunk","description":"检索到的片段"},"doc_name":{"type":"string","title":"Doc Name","description":"来源文档"},"score":{"type":"number","title":"Score","description":"综合得分"},"pre":{"type":"number","title":"Pre","description":"精确率"},"rec":{"type":"number","title":"Rec","description":"召回率"},"fai":{"type":"number","title":"Fai","description":"忠实值"},"rel":{"type":"number","title":"Rel","description":"可解释性"},"lcs":{"type":"number","title":"Lcs","description":"最长公共子串得分"},"leve":{"type":"number","title":"Leve","description":"编辑距离得分"},"jac":{"type":"number","title":"Jac","description":"杰卡德相似系数"}},"type":"object","required":["testCaseId","question","answer","llm_answer","related_chunk","doc_name","score","pre","rec","fai","rel","lcs","leve","jac"],"title":"TestCase","description":"测试用例信息"},"Testing":{"properties":{"testingId":{"type":"string","format":"uuid","title":"Testingid","description":"测试ID"},"testingName":{"type":"string","title":"Testingname","description":"测试名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"测试描述","max":150},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"测试使用的大模型信息"},"searchMethod":{"$ref":"#/components/schemas/SearchMethod"},"testingTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"测试任务"},"aveScore":{"type":"number","title":"Avescore","description":"综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"杰卡德相似系数","default":-1},"authorName":{"type":"string","title":"Authorname","description":"测试创建者的用户名"},"status":{"$ref":"#/components/schemas/TestingStatus"}},"type":"object","required":["testingId","testingName","description","searchMethod","authorName","status"],"title":"Testing","description":"测试信息"},"TestingStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"TestingStatus","description":"测试状态"},"TestingTestCase":{"properties":{"aveScore":{"type":"number","title":"Avescore","description":"平均综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"平均精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"平均召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"平均忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"平均可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"平均最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"平均编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"平均杰卡德相似系数","default":-1},"testCases":{"items":{"$ref":"#/components/schemas/TestCase"},"type":"array","title":"Testcases","description":"测试用例列表","default":[]}},"type":"object","title":"TestingTestCase","description":"GET /testing/testcase 数据结构"},"Tokenizer":{"type":"string","enum":["中文","en","mix"],"title":"Tokenizer","description":"分词器"},"TypeAction":{"properties":{"actionType":{"$ref":"#/components/schemas/ActionType"},"actions":{"items":{"$ref":"#/components/schemas/action"},"type":"array","title":"Actions","description":"操作列表","default":[]}},"type":"object","required":["actionType"],"title":"TypeAction","description":"不同类别的类别操作"},"UpdateChunkEnabledResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"分片ID列表","default":[]}},"type":"object","title":"UpdateChunkEnabledResponse","description":"PUT /chunk 响应"},"UpdateChunkRequest":{"properties":{"text":{"type":"string","title":"Text","description":"分块文本内容","default":"这是一个默认的分块文本"},"enabled":{"type":"boolean","title":"Enabled","description":"分块是否启用","default":true}},"type":"object","title":"UpdateChunkRequest"},"UpdateChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"分片ID"}},"type":"object","title":"UpdateChunkResponse","description":"PUT /chunk 响应"},"UpdateDataRequest":{"properties":{"question":{"type":"string","maxLength":200,"minLength":1,"title":"Question","description":"问题","default":"这是一个默认的问题"},"answer":{"type":"string","maxLength":1000,"minLength":1,"title":"Answer","description":"答案","default":"这是一个默认的答案"}},"type":"object","title":"UpdateDataRequest"},"UpdateDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据ID"}},"type":"object","title":"UpdateDataResponse","description":"PUT /dataset/data 响应"},"UpdateDatasetRequest":{"properties":{"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""}},"type":"object","title":"UpdateDatasetRequest"},"UpdateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"UpdateDatasetResponse","description":"PUT /dataset 响应"},"UpdateDocumentRequest":{"properties":{"docName":{"type":"string","maxLength":150,"minLength":1,"title":"Docname","default":"这是一个默认的文档名称"},"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"chunkSize":{"type":"integer","title":"Chunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用","default":true}},"type":"object","title":"UpdateDocumentRequest"},"UpdateDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","format":"uuid","title":"Result","description":"文档ID"}},"type":"object","title":"UpdateDocumentResponse","description":"PUT /doc 响应"},"UpdateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":30,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"UpdateKnowledgeBaseRequest"},"UpdateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"UpdateKnowledgeBaseResponse","description":"PUT /kb 响应"},"UpdateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"UpdateRoleRequest"},"UpdateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"UpdateRoleResponse","description":"PUT /role 响应"},"UpdateTeamAuthorResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamAuthorResponse","description":"PUT /team/author 响应"},"UpdateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"UpdateTeamRequest"},"UpdateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamResponse","description":"PUT /team 响应"},"UpdateTeamUserRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队成员ID"}},"type":"object","title":"UpdateTeamUserRoleResponse","description":"PUT /team/usr 响应"},"UpdateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":150,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_and_vector"},"topK":{"type":"integer","title":"Topk","description":"测试中检索方法关联的片段数量","default":5}},"type":"object","required":["llmId"],"title":"UpdateTestingRequest"},"UpdateTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"UpdateTestingResponse","description":"PUT /testing 响应"},"UpdateUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"消息ID"}},"type":"object","title":"UpdateUserMessageResponse","description":"PUT /usr_msg 响应"},"UploadDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"UploadDocumentResponse","description":"POST /doc 响应"},"UserMessageStatus":{"type":"string","enum":["unread","accepted","rejected"],"title":"UserMessageStatus","description":"用户消息状态"},"UserMessageType":{"type":"string","enum":["invitation","application"],"title":"UserMessageType","description":"用户消息类型"},"UserMsg":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"senderId":{"type":"string","format":"uuid","title":"Senderid","description":"发送者ID"},"senderName":{"type":"string","title":"Sendername","description":"发送者名称"},"receiverId":{"type":"string","format":"uuid","title":"Receiverid","description":"接收者ID"},"receiverName":{"type":"string","title":"Receivername","description":"接收者名称"},"msgType":{"allOf":[{"$ref":"#/components/schemas/UserMessageType"}],"description":"消息类型"},"msgStatus":{"allOf":[{"$ref":"#/components/schemas/UserMessageStatus"}],"description":"消息状态"},"createdTime":{"type":"string","title":"Createdtime","description":"创建时间"}},"type":"object","required":["teamId","msgId","senderId","senderName","receiverId","receiverName","msgType","msgStatus","createdTime"],"title":"UserMsg","description":"用户消息"},"UserStatus":{"type":"string","enum":["active","inactive","deleted"],"title":"UserStatus","description":"用户状态"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"action":{"properties":{"actionName":{"type":"string","title":"Actionname","description":"操作名称","max":20,"min":1},"action":{"type":"string","title":"Action","description":"操作","max":20,"min":1},"isUsed":{"type":"boolean","title":"Isused","description":"是否启用"}},"type":"object","required":["actionName","action","isUsed"],"title":"action","description":"操作信息"},"role":{"properties":{"roleId":{"type":"string","format":"uuid","title":"Roleid","description":"角色ID"},"roleName":{"type":"string","title":"Rolename","description":"角色名称","max":20,"min":1},"typeActions":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Typeactions","description":"操作类型列表","default":[]}},"type":"object","required":["roleId","roleName"],"title":"role","description":"角色信息"}}}} \ No newline at end of file diff --git a/openapi.json.1 b/openapi.json.1 new file mode 100644 index 0000000..6cd4613 --- /dev/null +++ b/openapi.json.1 @@ -0,0 +1 @@ +{"openapi":"3.1.0","info":{"title":"FastAPI","version":"0.1.0"},"paths":{"/routes":{"get":{"summary":"Get All Routes","operationId":"get_all_routes_routes_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/team/list":{"post":{"tags":["Team"],"summary":"List Teams","operationId":"list_teams_team_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/usr":{"post":{"tags":["Team"],"summary":"List Team User By Team Id","operationId":"list_team_user_by_team_id_team_usr_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_usr_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamUserRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team User By Team Id And User Subs","operationId":"delete_team_user_by_team_id_and_user_subs_team_usr_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string"},"title":"Usersub"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/msg":{"post":{"tags":["Team"],"summary":"List Team Msg By Team Id","operationId":"list_team_msg_by_team_id_team_msg_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team":{"post":{"tags":["Team"],"summary":"Create Team","operationId":"create_team_team_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team By Team Id","operationId":"delete_team_by_team_id_team_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/invitation":{"post":{"tags":["Team"],"summary":"Invite Team User By User Sub","operationId":"invite_team_user_by_user_sub_team_invitation_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"userSubInvite","in":"query","required":true,"schema":{"type":"string","title":"Usersubinvite"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/InviteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/application":{"post":{"tags":["Team"],"summary":"Join Team","operationId":"join_team_team_application_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JoinTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/author":{"put":{"tags":["Team"],"summary":"Update Team Author By Team Id","operationId":"update_team_author_by_team_id_team_author_put","parameters":[{"name":"recriverSub","in":"query","required":true,"schema":{"type":"string","title":"Recriversub"}},{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamAuthorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb":{"get":{"tags":["Knowledge Base"],"summary":"List Kb By User Sub","operationId":"list_kb_by_user_sub_kb_get","parameters":[{"name":"kbName","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListAllKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"tags":["Knowledge Base"],"summary":"Create Kb","operationId":"create_kb_kb_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Knowledge Base"],"summary":"Update Kb By Kb Id","operationId":"update_kb_by_kb_id_kb_put","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Knowledge Base"],"summary":"Delete Kb By Kb Ids","operationId":"delete_kb_by_kb_ids_kb_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/team":{"post":{"tags":["Knowledge Base"],"summary":"List Kb By Team Id","operationId":"list_kb_by_team_id_kb_team_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/doc_type":{"get":{"tags":["Knowledge Base"],"summary":"List Doc Types By Kb Id","operationId":"list_doc_types_by_kb_id_kb_doc_type_get","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentTypesResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/download":{"get":{"tags":["Knowledge Base"],"summary":"Download Kb By Task Id","operationId":"download_kb_by_task_id_kb_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/import":{"post":{"tags":["Knowledge Base"],"summary":"Import Kbs","operationId":"import_kbs_kb_import_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_kbs_kb_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/export":{"post":{"tags":["Knowledge Base"],"summary":"Export Kb By Kb Ids","operationId":"export_kb_by_kb_ids_kb_export_post","parameters":[{"name":"kbIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/list":{"post":{"tags":["Chunk"],"summary":"List Chunks By Document Id","operationId":"list_chunks_by_document_id_chunk_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/search":{"post":{"tags":["Chunk"],"summary":"Search Chunks","operationId":"search_chunks_chunk_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/":{"put":{"tags":["Chunk"],"summary":"Update Chunk By Id","operationId":"update_chunk_by_id_chunk__put","parameters":[{"name":"chunkId","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Chunkid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/switch":{"put":{"tags":["Chunk"],"summary":"Update Chunk Enabled By Id","operationId":"update_chunk_enabled_by_id_chunk_switch_put","parameters":[{"name":"enabled","in":"query","required":true,"schema":{"type":"boolean","title":"Enabled"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Chunkid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkEnabledResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/list":{"post":{"tags":["Document"],"summary":"List Doc","operationId":"list_doc_doc_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/download":{"get":{"tags":["Document"],"summary":"Download Doc By Id","operationId":"download_doc_by_id_doc_download_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/report":{"get":{"tags":["Document"],"summary":"Get Doc Report","operationId":"get_doc_report_doc_report_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetDocumentReportResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc":{"post":{"tags":["Document"],"summary":"Upload Docs","operationId":"upload_docs_doc_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_upload_docs_doc_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UploadDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Document"],"summary":"Update Doc By Doc Id","operationId":"update_doc_by_doc_id_doc_put","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/parse":{"post":{"tags":["Document"],"summary":"Parse Docuement By Doc Ids","operationId":"parse_docuement_by_doc_ids_doc_parse_post","parameters":[{"name":"docIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}},{"name":"parse","in":"query","required":true,"schema":{"type":"boolean","title":"Parse"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ParseDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/switch":{"put":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_switch_put","requestBody":{"content":{"application/json":{"schema":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Docids"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/health_check":{"get":{"tags":["Health check"],"summary":"Health Check","operationId":"health_check_health_check_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/dataset/list":{"post":{"tags":["Dataset"],"summary":"List Dataset By Kb Id","operationId":"list_dataset_by_kb_id_dataset_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/data":{"post":{"tags":["Dataset"],"summary":"List Data In Dataset","operationId":"list_data_in_dataset_dataset_data_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Data By Dataset Id","operationId":"update_data_by_dataset_id_dataset_data_put","parameters":[{"name":"dataId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Dataid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Data By Data Ids","operationId":"delete_data_by_data_ids_dataset_data_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Dataids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/testing/exist":{"get":{"tags":["Dataset"],"summary":"Is Dataset Have Testing","operationId":"is_dataset_have_testing_dataset_testing_exist_get","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/IsDatasetHaveTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/download":{"get":{"tags":["Dataset"],"summary":"Download Dataset By Task Id","operationId":"download_dataset_by_task_id_dataset_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset":{"post":{"tags":["Dataset"],"summary":"Create Dataset","operationId":"create_dataset_dataset_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Dataset By Dataset Id","operationId":"update_dataset_by_dataset_id_dataset_put","parameters":[{"name":"databaseId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Databaseid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Dataset By Dataset Ids","operationId":"delete_dataset_by_dataset_ids_dataset_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Databaseid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/import":{"post":{"tags":["Dataset"],"summary":"Import Dataset","operationId":"import_dataset_dataset_import_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_dataset_dataset_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/export":{"post":{"tags":["Dataset"],"summary":"Export Dataset By Dataset Ids","operationId":"export_dataset_by_dataset_ids_dataset_export_post","parameters":[{"name":"datasetIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Datasetids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/generate":{"post":{"tags":["Dataset"],"summary":"Generate Dataset By Id","operationId":"generate_dataset_by_id_dataset_generate_post","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}},{"name":"generate","in":"query","required":true,"schema":{"type":"boolean","title":"Generate"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/other/llm":{"get":{"tags":["Other"],"summary":"List Llms By User Sub","operationId":"list_llms_by_user_sub_other_llm_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListLLMResponse"}}}}}}},"/other/embedding":{"get":{"tags":["Other"],"summary":"List Embeddings","operationId":"list_embeddings_other_embedding_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListEmbeddingResponse"}}}}}}},"/other/tokenizer":{"get":{"tags":["Other"],"summary":"List Tokenizers","operationId":"list_tokenizers_other_tokenizer_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTokenizerResponse"}}}}}}},"/other/parse_method":{"get":{"tags":["Other"],"summary":"List Parse Method","operationId":"list_parse_method_other_parse_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListParseMethodResponse"}}}}}}},"/other/search_method":{"get":{"tags":["Other"],"summary":"List Search Method","operationId":"list_search_method_other_search_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListSearchMethodResponse"}}}}}}},"/testing/list":{"post":{"tags":["Testing"],"summary":"List Testing By Kb Id","operationId":"list_testing_by_kb_id_testing_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/testcase":{"post":{"tags":["Testing"],"summary":"List Testcase By Testing Id","operationId":"list_testcase_by_testing_id_testing_testcase_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestCaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/download":{"get":{"tags":["Testing"],"summary":"Download Testing Report By Testing Id","operationId":"download_testing_report_by_testing_id_testing_download_get","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing":{"post":{"tags":["Testing"],"summary":"Create Testing","operationId":"create_testing_testing_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingResponsing"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Testing"],"summary":"Update Testing By Testing Id","operationId":"update_testing_by_testing_id_testing_put","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Testing"],"summary":"Delete Testing By Testing Ids","operationId":"delete_testing_by_testing_ids_testing_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Testingids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/run":{"post":{"tags":["Testing"],"summary":"Run Testing By Testing Id","operationId":"run_testing_by_testing_id_testing_run_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}},{"name":"run","in":"query","required":true,"schema":{"type":"boolean","title":"Run"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/RunTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role/action":{"get":{"tags":["Role"],"summary":"List Actions","operationId":"list_actions_role_action_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListActionResponse"}}}}}}},"/role/list":{"post":{"tags":["Role"],"summary":"List Role By Team Id","operationId":"list_role_by_team_id_role_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role":{"post":{"tags":["Role"],"summary":"Create Role","operationId":"create_role_role_post","parameters":[{"name":"TeamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Role"],"summary":"Update Role By Role Id","operationId":"update_role_by_role_id_role_put","parameters":[{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Role"],"summary":"Delete Role By Role Ids","operationId":"delete_role_by_role_ids_role_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Roleid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg/list":{"post":{"tags":["User Message"],"summary":"List User Msgs By User Sub","operationId":"list_user_msgs_by_user_sub_usr_msg_list_post","parameters":[{"name":"msgType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserMessageType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg":{"put":{"tags":["User Message"],"summary":"Update User Msg By Msg Id","operationId":"update_user_msg_by_msg_id_usr_msg_put","parameters":[{"name":"msgId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Msgid"}},{"name":"msgStatus","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserStatus"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["User Message"],"summary":"Delete User Msg By Msg Ids","operationId":"delete_user_msg_by_msg_ids_usr_msg_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Msgids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task":{"post":{"tags":["Task"],"summary":"List Task","operationId":"list_task_task_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/one":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Id","operationId":"delete_task_by_task_id_task_one_delete","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByIdResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/all":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Type","operationId":"delete_task_by_task_type_task_all_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"taskType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/TaskType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByTypeResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"ActionType":{"type":"string","enum":["team","user","role","knowledge_base","document","chunk","dataset","testing","dataset_data","task"],"title":"ActionType","description":"操作类型"},"Body_import_dataset_dataset_import_post":{"properties":{"dataset_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Dataset Packages"}},"type":"object","required":["dataset_packages"],"title":"Body_import_dataset_dataset_import_post"},"Body_import_kbs_kb_import_post":{"properties":{"kb_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Kb Packages"}},"type":"object","required":["kb_packages"],"title":"Body_import_kbs_kb_import_post"},"Body_upload_docs_doc_post":{"properties":{"docs":{"items":{"type":"string","format":"binary"},"type":"array","title":"Docs"}},"type":"object","required":["docs"],"title":"Body_upload_docs_doc_post"},"Chunk":{"properties":{"chunkId":{"type":"string","format":"uuid","title":"Chunkid","description":"分片ID"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"分片类型"},"text":{"type":"string","title":"Text","description":"分片文本"}},"type":"object","required":["chunkId","chunkType","text"],"title":"Chunk","description":"文档分片信息"},"ChunkType":{"type":"string","enum":["text","table","image","code","link","qa"],"title":"ChunkType","description":"分块类型"},"CreateDatasetRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""},"documentIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Documentids","description":"测试数据集关联的文档","default":[]},"dataCnt":{"type":"integer","title":"Datacnt","description":"测试数据集内的数据数量","default":64,"max":512,"min":1},"llmId":{"type":"string","title":"Llmid","description":"测试数据集使用的大模型id"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"测试数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"测试数据集进行上下文关联","default":false}},"type":"object","required":["kbId","llmId"],"title":"CreateDatasetRequest"},"CreateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集生成任务ID"}},"type":"object","title":"CreateDatasetResponse","description":"POST /dataset 响应"},"CreateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":20,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"知识库使用的embedding模型","default":""},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"CreateKnowledgeBaseRequest"},"CreateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"CreateKnowledgeBaseResponse","description":"POST /kb 响应"},"CreateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"CreateRoleRequest"},"CreateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"CreateRoleResponse","description":"POST /role 响应"},"CreateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"CreateTeamRequest"},"CreateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"CreateTeamResponse","description":"POST /team 响应"},"CreateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":30,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"测试数据集id"},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_and_vector"},"topK":{"type":"integer","title":"Topk","description":"测试中检索方法关联的片段数量","default":5}},"type":"object","required":["datasetId","llmId"],"title":"CreateTestingRequest"},"CreateTestingResponsing":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"CreateTestingResponsing","description":"POST /testing 响应"},"Data":{"properties":{"dataId":{"type":"string","format":"uuid","title":"Dataid","description":"数据ID"},"docName":{"type":"string","title":"Docname","description":"数据关联的文档名称"},"question":{"type":"string","title":"Question","description":"数据的问题"},"answer":{"type":"string","title":"Answer","description":"数据的答案"},"chunk":{"type":"string","title":"Chunk","description":"数据的片段"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"数据的片段类型"}},"type":"object","required":["dataId","docName","question","answer","chunk","chunkType"],"title":"Data"},"DataSetStatus":{"type":"string","enum":["idle","pending","generating","exporting","importing","deleted"],"title":"DataSetStatus","description":"数据集状态"},"Dataset":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"数据集描述","max":150},"dataCnt":{"type":"integer","title":"Datacnt","description":"数据集数据数量"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"数据集进行上下文关联","default":false},"isImported":{"type":"boolean","title":"Isimported","description":"数据集是否导入","default":false},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"生成数据集使用的大模型信息"},"generateTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"数据集生成任务"},"score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Score","description":"数据集评分"},"authorName":{"type":"string","title":"Authorname","description":"数据集创建者的用户名"},"status":{"$ref":"#/components/schemas/DataSetStatus"}},"type":"object","required":["datasetId","datasetName","description","dataCnt","authorName","status"],"title":"Dataset","description":"数据集信息"},"DatasetTesting":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称"},"testings":{"items":{"$ref":"#/components/schemas/Testing"},"type":"array","title":"Testings","description":"测试列表","default":[]}},"type":"object","required":["datasetId","datasetName"],"title":"DatasetTesting","description":"数据集测试信息"},"DeleteDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据ID列表","default":[]}},"type":"object","title":"DeleteDataResponse","description":"DELETE /dataset/data 响应"},"DeleteDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据集ID列表","default":[]}},"type":"object","title":"DeleteDatasetResponse","description":"DELETE /dataset 响应"},"DeleteDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"DeleteDocumentResponse","description":"DELETE /doc 响应"},"DeleteKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"知识库ID列表","default":[]}},"type":"object","title":"DeleteKnowledgeBaseResponse","description":"DELETE /kb 响应"},"DeleteRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"角色ID列表","default":[]}},"type":"object","title":"DeleteRoleResponse","description":"DELETE /role 响应"},"DeleteTaskByIdResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"任务ID"}},"type":"object","title":"DeleteTaskByIdResponse","description":"DELETE /task/one 响应"},"DeleteTaskByTypeResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"DeleteTaskByTypeResponse","description":"DELETE /task/all 响应"},"DeleteTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"DeleteTeamResponse","description":"DELETE /team 响应"},"DeleteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"团队成员ID列表","default":[]}},"type":"object","title":"DeleteTeamUserResponse","description":"DELETE /team/usr 响应"},"DeleteTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"测试ID列表","default":[]}},"type":"object","title":"DeleteTestingResponse","description":"DELETE /testing 响应"},"DeleteUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"消息ID列表","default":[]}},"type":"object","title":"DeleteUserMessageResponse","description":"DELETE /usr_msg 响应"},"DocChunk":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","required":["docId","docName"],"title":"DocChunk","description":"Post /chunk/search 数据结构"},"Document":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"docType":{"allOf":[{"$ref":"#/components/schemas/DocumentType-Output"}],"description":"文档类型"},"chunkSize":{"type":"integer","title":"Chunksize","description":"文档分片大小"},"createdTime":{"type":"string","title":"Createdtime","description":"文档创建时间"},"docTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"文档任务"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"文档解析方法"},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用"},"authorName":{"type":"string","title":"Authorname","description":"文档创建者的用户名"},"status":{"$ref":"#/components/schemas/DocumentStatus"}},"type":"object","required":["docId","docName","docType","chunkSize","createdTime","parseMethod","enabled","authorName","status"],"title":"Document","description":"文档信息"},"DocumentStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"DocumentStatus","description":"文档状态"},"DocumentType-Input":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"docTypeName":{"type":"string","maxLength":20,"minLength":1,"title":"Doctypename","default":"这是一个默认的文档类型名称"}},"type":"object","title":"DocumentType"},"DocumentType-Output":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型ID"},"docTypeName":{"type":"string","title":"Doctypename","description":"文档类型名称"}},"type":"object","required":["docTypeId","docTypeName"],"title":"DocumentType","description":"文档类型信息"},"ExportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ExportDatasetResponse","description":"POST /dataset/export 响应"},"ExportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ExportKnowledgeBaseResponse","description":"POST /kb/export 响应"},"GenerateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"GenerateDatasetResponse","description":"POST /dataset/generate 响应"},"GetDocumentReportResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","title":"Result","description":"文档报告数据结构","default":""}},"type":"object","title":"GetDocumentReportResponse","description":"GET /doc/report 响应"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ImportDatasetResponse","description":"POST /dataset/import 响应"},"ImportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ImportKnowledgeBaseResponse","description":"POST /kb/import 响应"},"InviteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"邀请ID"}},"type":"object","title":"InviteTeamUserResponse","description":"POST /team/invitation 响应"},"IsDatasetHaveTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"boolean","title":"Result","description":"数据集是否有测试任务","default":false}},"type":"object","title":"IsDatasetHaveTestingResponse","description":"GET /dataset/testing/exist 响应"},"JoinTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"申请ID"}},"type":"object","title":"JoinTeamResponse","description":"POST /team/application 响应"},"Knowledgebase":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"知识库ID"},"kbName":{"type":"string","title":"Kbname","description":"知识库名称","max":20,"min":1},"authorName":{"type":"string","title":"Authorname","description":"知识库创建者的用户名"},"tokenizer":{"$ref":"#/components/schemas/Tokenizer"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"嵌入模型"},"description":{"type":"string","title":"Description","description":"知识库描述","max":150},"docCnt":{"type":"integer","title":"Doccnt","description":"知识库文档数量"},"docSize":{"type":"integer","title":"Docsize","description":"知识库文档大小"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库单次文件上传数量限制"},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库单次文件上传大小限制"},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"默认解析方法"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"默认分块大小"},"createdTime":{"type":"string","title":"Createdtime","description":"知识库创建时间"},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Doctypes","description":"知识库文档类型列表","default":[]}},"type":"object","required":["kbId","kbName","authorName","tokenizer","embeddingModel","description","docCnt","docSize","uploadCountLimit","uploadSizeLimit","defaultParseMethod","defaultChunkSize","createdTime"],"title":"Knowledgebase","description":"知识库信息"},"LLM":{"properties":{"llmId":{"type":"string","title":"Llmid","description":"大模型ID"},"llmName":{"type":"string","title":"Llmname","description":"大模型名称","max":20,"min":1},"llmIcon":{"type":"string","title":"Llmicon","description":"大模型图标"}},"type":"object","required":["llmId","llmName","llmIcon"],"title":"LLM"},"ListActionMsg":{"properties":{"actionTypes":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Actiontypes","description":"操作类型列表","default":[]}},"type":"object","title":"ListActionMsg","description":"GET /role/action 数据结构"},"ListActionResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListActionMsg"}],"description":"操作列表数据结构","default":{"actionTypes":[]}}},"type":"object","title":"ListActionResponse"},"ListAllKnowledgeBaseMsg":{"properties":{"teamKnowledgebases":{"items":{"$ref":"#/components/schemas/TeamKnowledgebase"},"type":"array","title":"Teamknowledgebases","description":"团队知识库列表","default":[]}},"type":"object","title":"ListAllKnowledgeBaseMsg","description":"GET /kb 数据结构"},"ListAllKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListAllKnowledgeBaseMsg"}],"description":"团队知识库列表数据结构","default":{"teamKnowledgebases":[]}}},"type":"object","title":"ListAllKnowledgeBaseResponse","description":"GET /kb 响应"},"ListChunkMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","title":"ListChunkMsg","description":"GET /chunk 数据结构"},"ListChunkRequest":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档id"},"text":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Text","description":"分块文本内容"},"types":{"anyOf":[{"items":{"$ref":"#/components/schemas/ChunkType"},"type":"array"},{"type":"null"}],"title":"Types","description":"分块类型"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["docId"],"title":"ListChunkRequest"},"ListChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListChunkMsg"}],"description":"分片列表数据结构","default":{"total":0,"chunks":[]}}},"type":"object","title":"ListChunkResponse","description":"GET /chunk 响应"},"ListDataInDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"data":{"items":{"$ref":"#/components/schemas/Data"},"type":"array","title":"Data","description":"数据列表","default":[]}},"type":"object","title":"ListDataInDatasetMsg","description":"GET /dataset/data 数据结构"},"ListDataInDatasetRequest":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["datasetId"],"title":"ListDataInDatasetRequest"},"ListDataInDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDataInDatasetMsg"}],"description":"数据列表数据结构","default":{"total":0,"data":[]}}},"type":"object","title":"ListDataInDatasetResponse","description":"GET /dataset/data 响应"},"ListDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasets":{"items":{"$ref":"#/components/schemas/Dataset"},"type":"array","title":"Datasets","description":"数据集列表","default":[]}},"type":"object","title":"ListDatasetMsg","description":"GET /dataset 数据结构"},"ListDatasetRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"datasetId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Datasetid","description":"数据集id"},"datasetName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Datasetname","description":"数据集名称"},"dataCnt":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集数据数量","default":"desc"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"数据集使用的大模型id"},"isDataCleared":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Isdatacleared","description":"数据集是否清洗"},"isChunkRelated":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Ischunkrelated","description":"数据集是否上下文关联"},"generateStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Generatestatus","description":"数据集生成状态"},"scoreOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集评分的排序方法","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"数据集创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["kbId"],"title":"ListDatasetRequest"},"ListDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDatasetMsg"}],"description":"数据集列表数据结构","default":{"total":0,"datasets":[]}}},"type":"object","title":"ListDatasetResponse","description":"GET /dataset 响应"},"ListDocumentMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"documents":{"items":{"$ref":"#/components/schemas/Document"},"type":"array","title":"Documents","description":"文档列表","default":[]}},"type":"object","title":"ListDocumentMsg","description":"GET /doc 数据结构"},"ListDocumentRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"docId":{"anyOf":[{"type":"string","maxLength":30,"minLength":1,"format":"uuid"},{"type":"null"}],"title":"Docid","description":"文档id"},"docName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Docname","description":"文档名称"},"docTypeId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Doctypeid","description":"文档类型id"},"parseStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Parsestatus","description":"文档解析状态"},"parseMethod":{"anyOf":[{"$ref":"#/components/schemas/ParseMethod"},{"type":"null"}],"description":"文档解析方法"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"文档创建者"},"createdTimeStart":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimestart","description":"文档创建时间开始"},"createdTimeEnd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimeend","description":"文档创建时间结束"},"createdTimeOrder":{"allOf":[{"$ref":"#/components/schemas/OrderType"}],"description":"文档创建时间排序","default":"desc"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["kbId"],"title":"ListDocumentRequest"},"ListDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDocumentMsg"}],"description":"文档列表数据结构","default":{"total":0,"documents":[]}}},"type":"object","title":"ListDocumentResponse","description":"GET /doc 响应"},"ListDocumentTypesResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Result","description":"文档类型列表数据结构","default":[]}},"type":"object","title":"ListDocumentTypesResponse","description":"GET /kb/doc_type 响应"},"ListEmbeddingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"向量化模型的列表数据结构","default":[]}},"type":"object","title":"ListEmbeddingResponse","description":"GET /other/embedding 数据结构"},"ListKnowledgeBaseMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表数据结构","default":[]}},"type":"object","title":"ListKnowledgeBaseMsg"},"ListKnowledgeBaseRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队id"},"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"kbName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname","description":"资产名称"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"资产创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListKnowledgeBaseRequest"},"ListKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListKnowledgeBaseMsg"}],"default":{"total":0,"kbList":[]}}},"type":"object","title":"ListKnowledgeBaseResponse","description":"GET /kb/team 响应"},"ListLLMMsg":{"properties":{"llms":{"items":{"$ref":"#/components/schemas/LLM"},"type":"array","title":"Llms","description":"大模型列表","default":[]}},"type":"object","title":"ListLLMMsg","description":"GET /other/llm 数据结构"},"ListLLMResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListLLMMsg"}],"description":"大模型列表数据结构","default":{"llms":[]}}},"type":"object","title":"ListLLMResponse","description":"GET /other/llm 响应"},"ListParseMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"解析方法的列表数据结构","default":[]}},"type":"object","title":"ListParseMethodResponse","description":"\"GET /other/parse_method 响应"},"ListRoleMsg":{"properties":{"roles":{"items":{"$ref":"#/components/schemas/role"},"type":"array","title":"Roles","description":"角色列表","default":[]}},"type":"object","title":"ListRoleMsg","description":"GET /role 数据结构"},"ListRoleRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队id"},"roleId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Roleid","description":"角色id"},"roleName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rolename","description":"角色名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListRoleRequest"},"ListRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListRoleMsg"}],"description":"角色列表数据结构","default":{"roles":[]}}},"type":"object","title":"ListRoleResponse","description":"GET /role 响应"},"ListSearchMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"搜索方法的列表数据结构","default":[]}},"type":"object","title":"ListSearchMethodResponse","description":"GET /other/search_method 响应"},"ListTaskMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"tasks":{"items":{"$ref":"#/components/schemas/Task"},"type":"array","title":"Tasks","description":"任务列表","default":[]}},"type":"object","title":"ListTaskMsg","description":"GET /task 数据结构"},"ListTaskRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队id"},"taskId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Taskid","description":"任务id"},"taskType":{"anyOf":[{"$ref":"#/components/schemas/TaskType"},{"type":"null"}],"description":"任务类型"},"taskStatus":{"anyOf":[{"$ref":"#/components/schemas/TaskStatus"},{"type":"null"}],"description":"任务状态"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListTaskRequest"},"ListTaskResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTaskMsg"}],"description":"任务列表数据结构","default":{"total":0,"tasks":[]}}},"type":"object","title":"ListTaskResponse","description":"GET /task 响应"},"ListTeamMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teams":{"items":{"$ref":"#/components/schemas/Team"},"type":"array","title":"Teams","description":"团队列表","default":[]}},"type":"object","title":"ListTeamMsg","description":"GET /team 数据结构"},"ListTeamMsgMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamMsgs":{"items":{"$ref":"#/components/schemas/TeamMsg"},"type":"array","title":"Teammsgs","description":"团队消息列表","default":[]}},"type":"object","title":"ListTeamMsgMsg","description":"GET /team/msg 数据结构"},"ListTeamMsgRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamMsgRequest"},"ListTeamMsgResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsgMsg"}],"description":"团队消息列表数据结构","default":{"total":0,"teamMsgs":[]}}},"type":"object","title":"ListTeamMsgResponse"},"ListTeamRequest":{"properties":{"teamType":{"allOf":[{"$ref":"#/components/schemas/TeamType"}],"description":"团队类型","default":"all"},"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"teamName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Teamname","description":"团队名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamRequest"},"ListTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsg"}],"description":"团队列表数据结构","default":{"total":0,"teams":[]}}},"type":"object","title":"ListTeamResponse","description":"GET /team 响应"},"ListTeamUserMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamUsers":{"items":{"$ref":"#/components/schemas/TeamUser"},"type":"array","title":"Teamusers","description":"团队成员列表","default":[]}},"type":"object","title":"ListTeamUserMsg","description":"GET /team/usr 数据结构"},"ListTeamUserRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"userSub":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Usersub","description":"用户ID"},"userName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Username","description":"用户名"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListTeamUserRequest"},"ListTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamUserMsg"}],"description":"团队成员列表数据结构","default":{"total":0,"teamUsers":[]}}},"type":"object","title":"ListTeamUserResponse"},"ListTestCaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/TestingTestCase"}],"description":"测试用例列表数据结构","default":{"aveScore":-1.0,"avePre":-1.0,"aveRec":-1.0,"aveFai":-1.0,"aveRel":-1.0,"aveLcs":-1.0,"aveLeve":-1.0,"aveJac":-1.0,"testCases":[]}}},"type":"object","title":"ListTestCaseResponse","description":"GET /testing/testcase 响应"},"ListTestingMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasetTestings":{"items":{"$ref":"#/components/schemas/DatasetTesting"},"type":"array","title":"Datasettestings","description":"数据集测试列表","default":[]}},"type":"object","title":"ListTestingMsg","description":"GET /testing 数据结构"},"ListTestingRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"testingId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Testingid","description":"测试id"},"testingName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Testingname","description":"测试名称"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"测试使用的大模型id"},"runStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Runstatus","description":"测试运行状态"},"scoresOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"测试评分","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"测试创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["kbId"],"title":"ListTestingRequest"},"ListTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTestingMsg"}],"description":"测试列表数据结构","default":{"total":0,"datasetTestings":[]}}},"type":"object","title":"ListTestingResponse","description":"GET /testing 响应"},"ListTokenizerResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"分词器的列表数据结构","default":[]}},"type":"object","title":"ListTokenizerResponse","description":"GET /other/tokenizer 响应"},"ListUserMessageMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"userMessages":{"items":{"$ref":"#/components/schemas/UserMsg"},"type":"array","title":"Usermessages","description":"用户消息列表","default":[]}},"type":"object","title":"ListUserMessageMsg","description":"GET /usr_msg 数据结构"},"ListUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListUserMessageMsg"}],"description":"用户消息列表数据结构","default":{"total":0,"userMessages":[]}}},"type":"object","title":"ListUserMessageResponse"},"OrderType":{"type":"string","enum":["asc","desc"],"title":"OrderType","description":"排序"},"ParseDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"ParseDocumentResponse","description":"POST /doc/parse 响应"},"ParseMethod":{"type":"string","enum":["general","ocr","enhanced","qa"],"title":"ParseMethod","description":"解析方法"},"RunTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"RunTestingResponse","description":"POST /testing/run 响应"},"SearchChunkMsg":{"properties":{"docChunks":{"items":{"$ref":"#/components/schemas/DocChunk"},"type":"array","title":"Docchunks","description":"文档分片列表","default":[]}},"type":"object","title":"SearchChunkMsg","description":"Post /chunk/search 数据结构"},"SearchChunkRequest":{"properties":{"kbIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Kbids","description":"资产id","default":[]},"query":{"type":"string","title":"Query","description":"查询内容","default":""},"topK":{"type":"integer","title":"Topk","description":"返回的结果数量","default":5},"docIds":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Docids","description":"文档id"},"bannedIds":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Bannedids","description":"禁止的分块id","default":[]},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"检索方法","default":"keyword_and_vector"},"isRelatedSurrounding":{"type":"boolean","title":"Isrelatedsurrounding","description":"是否关联上下文","default":true},"isClassifyByDoc":{"type":"boolean","title":"Isclassifybydoc","description":"是否按文档分类","default":false},"isRerank":{"type":"boolean","title":"Isrerank","description":"是否重新排序","default":false},"isCompress":{"type":"boolean","title":"Iscompress","description":"是否压缩","default":false},"tokensLimit":{"type":"integer","title":"Tokenslimit","description":"token限制","default":8192}},"type":"object","title":"SearchChunkRequest"},"SearchChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/SearchChunkMsg"}],"description":"文档分片列表数据结构","default":{"docChunks":[]}}},"type":"object","title":"SearchChunkResponse","description":"POST /chunk/search 响应"},"SearchMethod":{"type":"string","enum":["keyword","vector","keyword_and_vector","doc2chunk","doc2chunk_bfs","enhanced_by_llm"],"title":"SearchMethod","description":"搜索方法"},"Task":{"properties":{"opId":{"type":"string","format":"uuid","title":"Opid","description":"关联实体ID"},"opName":{"type":"string","title":"Opname","description":"关联实体的名称"},"taskId":{"type":"string","format":"uuid","title":"Taskid","description":"任务ID"},"taskStatus":{"$ref":"#/components/schemas/TaskStatus"},"taskType":{"$ref":"#/components/schemas/TaskType"},"taskCompleted":{"type":"number","title":"Taskcompleted","description":"任务完成度"},"createdTime":{"type":"string","title":"Createdtime","description":"任务创建时间"}},"type":"object","required":["opId","opName","taskId","taskStatus","taskType","taskCompleted","createdTime"],"title":"Task","description":"任务信息"},"TaskStatus":{"type":"string","enum":["pending","running","success","failed","canceled","deleted"],"title":"TaskStatus","description":"任务状态"},"TaskType":{"type":"string","enum":["doc_parse","kb_export","kb_import","dataset_export","dataset_import","dataset_generate","testing_run"],"title":"TaskType","description":"任务类型"},"Team":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","description":"团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","description":"团队描述"},"authorName":{"type":"string","title":"Authorname","description":"团队创建者的用户ID"},"memberCount":{"type":"integer","title":"Membercount","description":"团队成员数量"},"isPublic":{"type":"boolean","title":"Ispublic","description":"是否为公开团队"},"createdTime":{"type":"string","title":"Createdtime","description":"团队创建时间"}},"type":"object","required":["teamId","teamName","description","authorName","memberCount","isPublic","createdTime"],"title":"Team","description":"团队信息"},"TeamKnowledgebase":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","title":"Teamname","description":"团队名称"},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表","default":[]}},"type":"object","required":["teamId","teamName"],"title":"TeamKnowledgebase","description":"团队知识库信息"},"TeamMsg":{"properties":{"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"authorName":{"type":"string","title":"Authorname","description":"消息发送者的用户名"},"message":{"type":"string","title":"Message","description":"消息内容"}},"type":"object","required":["msgId","authorName","message"],"title":"TeamMsg","description":"团队信息"},"TeamType":{"type":"string","enum":["mycreated","myjoined","all"],"title":"TeamType","description":"团队类型"},"TeamUser":{"properties":{"userId":{"type":"string","format":"uuid","title":"Userid","description":"用户ID"},"userName":{"type":"string","title":"Username","description":"用户名"},"roleName":{"type":"string","title":"Rolename","description":"角色名称"}},"type":"object","required":["userId","userName","roleName"],"title":"TeamUser","description":"团队成员信息"},"TestCase":{"properties":{"testCaseId":{"type":"string","format":"uuid","title":"Testcaseid","description":"测试用例ID"},"question":{"type":"string","title":"Question","description":"问题"},"answer":{"type":"string","title":"Answer","description":"标准答案"},"llm_answer":{"type":"string","title":"Llm Answer","description":"大模型的回答"},"related_chunk":{"type":"string","title":"Related Chunk","description":"检索到的片段"},"doc_name":{"type":"string","title":"Doc Name","description":"来源文档"},"score":{"type":"number","title":"Score","description":"综合得分"},"pre":{"type":"number","title":"Pre","description":"精确率"},"rec":{"type":"number","title":"Rec","description":"召回率"},"fai":{"type":"number","title":"Fai","description":"忠实值"},"rel":{"type":"number","title":"Rel","description":"可解释性"},"lcs":{"type":"number","title":"Lcs","description":"最长公共子串得分"},"leve":{"type":"number","title":"Leve","description":"编辑距离得分"},"jac":{"type":"number","title":"Jac","description":"杰卡德相似系数"}},"type":"object","required":["testCaseId","question","answer","llm_answer","related_chunk","doc_name","score","pre","rec","fai","rel","lcs","leve","jac"],"title":"TestCase","description":"测试用例信息"},"Testing":{"properties":{"testingId":{"type":"string","format":"uuid","title":"Testingid","description":"测试ID"},"testingName":{"type":"string","title":"Testingname","description":"测试名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"测试描述","max":150},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"测试使用的大模型信息"},"searchMethod":{"$ref":"#/components/schemas/SearchMethod"},"testingTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"测试任务"},"aveScore":{"type":"number","title":"Avescore","description":"综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"杰卡德相似系数","default":-1},"authorName":{"type":"string","title":"Authorname","description":"测试创建者的用户名"},"status":{"$ref":"#/components/schemas/TestingStatus"}},"type":"object","required":["testingId","testingName","description","searchMethod","authorName","status"],"title":"Testing","description":"测试信息"},"TestingStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"TestingStatus","description":"测试状态"},"TestingTestCase":{"properties":{"aveScore":{"type":"number","title":"Avescore","description":"平均综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"平均精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"平均召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"平均忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"平均可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"平均最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"平均编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"平均杰卡德相似系数","default":-1},"testCases":{"items":{"$ref":"#/components/schemas/TestCase"},"type":"array","title":"Testcases","description":"测试用例列表","default":[]}},"type":"object","title":"TestingTestCase","description":"GET /testing/testcase 数据结构"},"Tokenizer":{"type":"string","enum":["中文","en","mix"],"title":"Tokenizer","description":"分词器"},"TypeAction":{"properties":{"actionType":{"$ref":"#/components/schemas/ActionType"},"actions":{"items":{"$ref":"#/components/schemas/action"},"type":"array","title":"Actions","description":"操作列表","default":[]}},"type":"object","required":["actionType"],"title":"TypeAction","description":"不同类别的类别操作"},"UpdateChunkEnabledResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"分片ID列表","default":[]}},"type":"object","title":"UpdateChunkEnabledResponse","description":"PUT /chunk 响应"},"UpdateChunkRequest":{"properties":{"text":{"type":"string","title":"Text","description":"分块文本内容","default":"这是一个默认的分块文本"},"enabled":{"type":"boolean","title":"Enabled","description":"分块是否启用","default":true}},"type":"object","title":"UpdateChunkRequest"},"UpdateChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"分片ID"}},"type":"object","title":"UpdateChunkResponse","description":"PUT /chunk 响应"},"UpdateDataRequest":{"properties":{"question":{"type":"string","maxLength":200,"minLength":1,"title":"Question","description":"问题","default":"这是一个默认的问题"},"answer":{"type":"string","maxLength":1000,"minLength":1,"title":"Answer","description":"答案","default":"这是一个默认的答案"}},"type":"object","title":"UpdateDataRequest"},"UpdateDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据ID"}},"type":"object","title":"UpdateDataResponse","description":"PUT /dataset/data 响应"},"UpdateDatasetRequest":{"properties":{"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""}},"type":"object","title":"UpdateDatasetRequest"},"UpdateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"UpdateDatasetResponse","description":"PUT /dataset 响应"},"UpdateDocumentRequest":{"properties":{"docName":{"type":"string","maxLength":150,"minLength":1,"title":"Docname","default":"这是一个默认的文档名称"},"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"chunkSize":{"type":"integer","title":"Chunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用","default":true}},"type":"object","title":"UpdateDocumentRequest"},"UpdateDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","format":"uuid","title":"Result","description":"文档ID"}},"type":"object","title":"UpdateDocumentResponse","description":"PUT /doc 响应"},"UpdateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":30,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"UpdateKnowledgeBaseRequest"},"UpdateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"UpdateKnowledgeBaseResponse","description":"PUT /kb 响应"},"UpdateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"UpdateRoleRequest"},"UpdateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"UpdateRoleResponse","description":"PUT /role 响应"},"UpdateTeamAuthorResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamAuthorResponse","description":"PUT /team/author 响应"},"UpdateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"UpdateTeamRequest"},"UpdateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamResponse","description":"PUT /team 响应"},"UpdateTeamUserRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队成员ID"}},"type":"object","title":"UpdateTeamUserRoleResponse","description":"PUT /team/usr 响应"},"UpdateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":150,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_and_vector"},"topK":{"type":"integer","title":"Topk","description":"测试中检索方法关联的片段数量","default":5}},"type":"object","required":["llmId"],"title":"UpdateTestingRequest"},"UpdateTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"UpdateTestingResponse","description":"PUT /testing 响应"},"UpdateUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"消息ID"}},"type":"object","title":"UpdateUserMessageResponse","description":"PUT /usr_msg 响应"},"UploadDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"UploadDocumentResponse","description":"POST /doc 响应"},"UserMessageStatus":{"type":"string","enum":["unread","accepted","rejected"],"title":"UserMessageStatus","description":"用户消息状态"},"UserMessageType":{"type":"string","enum":["invitation","application"],"title":"UserMessageType","description":"用户消息类型"},"UserMsg":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"senderId":{"type":"string","format":"uuid","title":"Senderid","description":"发送者ID"},"senderName":{"type":"string","title":"Sendername","description":"发送者名称"},"receiverId":{"type":"string","format":"uuid","title":"Receiverid","description":"接收者ID"},"receiverName":{"type":"string","title":"Receivername","description":"接收者名称"},"msgType":{"allOf":[{"$ref":"#/components/schemas/UserMessageType"}],"description":"消息类型"},"msgStatus":{"allOf":[{"$ref":"#/components/schemas/UserMessageStatus"}],"description":"消息状态"},"createdTime":{"type":"string","title":"Createdtime","description":"创建时间"}},"type":"object","required":["teamId","msgId","senderId","senderName","receiverId","receiverName","msgType","msgStatus","createdTime"],"title":"UserMsg","description":"用户消息"},"UserStatus":{"type":"string","enum":["active","inactive","deleted"],"title":"UserStatus","description":"用户状态"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"action":{"properties":{"actionName":{"type":"string","title":"Actionname","description":"操作名称","max":20,"min":1},"action":{"type":"string","title":"Action","description":"操作","max":20,"min":1},"isUsed":{"type":"boolean","title":"Isused","description":"是否启用"}},"type":"object","required":["actionName","action","isUsed"],"title":"action","description":"操作信息"},"role":{"properties":{"roleId":{"type":"string","format":"uuid","title":"Roleid","description":"角色ID"},"roleName":{"type":"string","title":"Rolename","description":"角色名称","max":20,"min":1},"typeActions":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Typeactions","description":"操作类型列表","default":[]}},"type":"object","required":["roleId","roleName"],"title":"role","description":"角色信息"}}}} \ No newline at end of file -- Gitee From f60b9ee131c92e038fc8aeaac08a2a5626367747 Mon Sep 17 00:00:00 2001 From: zxstty Date: Thu, 15 May 2025 15:46:18 +0800 Subject: [PATCH 10/15] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E9=97=AE=E7=AD=94?= =?UTF-8?q?=E5=AF=B9=E6=95=B0=E6=8D=AE=E9=9B=86=E5=AF=BC=E5=85=A5=E7=9A=84?= =?UTF-8?q?bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data_chain/apps/base/convertor.py | 3 +- .../base/task/worker/import_dataset_worker.py | 29 ++++++++++-------- data_chain/apps/router/document.py | 6 ++-- data_chain/apps/service/dataset_service.py | 27 ++++++++-------- data_chain/entities/enum.py | 2 ++ data_chain/manager/document_manager.py | 4 +-- data_chain/parser/handler/json_parser.py | 8 ++--- data_chain/parser/handler/yaml_parser.py | 7 +++-- openapi.json | 2 +- .../188455f8-fce3-4f87-8f6f-59e266af7d91.xlsx | Bin 0 -> 5911 bytes .../188455f8-fce3-4f87-8f6f-59e266af7d91.yaml | 13 ++++++++ 11 files changed, 63 insertions(+), 38 deletions(-) create mode 100644 witchaind-dataset-import/9233c762-c7c5-43f4-8aa0-372273039622/188455f8-fce3-4f87-8f6f-59e266af7d91.xlsx create mode 100644 witchaind-dataset-import/dd3471bd-33bc-4075-9fa1-ab8d236e35b1/188455f8-fce3-4f87-8f6f-59e266af7d91.yaml diff --git a/data_chain/apps/base/convertor.py b/data_chain/apps/base/convertor.py index 442b3f9..f5d1dc8 100644 --- a/data_chain/apps/base/convertor.py +++ b/data_chain/apps/base/convertor.py @@ -436,6 +436,7 @@ class Convertor: dataCnt=dataset_entity.data_cnt, isDataCleared=dataset_entity.is_data_cleared, isChunkRelated=dataset_entity.is_chunk_related, + isImported=dataset_entity.is_imported, score=dataset_entity.score, authorName=dataset_entity.author_name, status=dataset_entity.status, @@ -487,7 +488,7 @@ class Convertor: question=qa_entity.question, answer=qa_entity.answer, chunk=qa_entity.chunk, - chunkType=qa_entity.chunk_type, + chunkType=ChunkType(qa_entity.chunk_type), ) return data except Exception as e: diff --git a/data_chain/apps/base/task/worker/import_dataset_worker.py b/data_chain/apps/base/task/worker/import_dataset_worker.py index 3e63c7e..ee6200c 100644 --- a/data_chain/apps/base/task/worker/import_dataset_worker.py +++ b/data_chain/apps/base/task/worker/import_dataset_worker.py @@ -10,7 +10,7 @@ from data_chain.config.config import config from data_chain.logger.logger import logger as logging from data_chain.apps.base.task.worker.base_worker import BaseWorker from data_chain.llm.llm import LLM -from data_chain.entities.enum import TaskType, TaskStatus, KnowledgeBaseStatus, DocumentStatus, DataSetStatus, QAStatus +from data_chain.entities.enum import TaskType, TaskStatus, KnowledgeBaseStatus, DocumentStatus, DataSetStatus, QAStatus, ChunkType from data_chain.entities.common import DEFAULt_DOC_TYPE_ID, IMPORT_DATASET_PATH_IN_OS, IMPORT_DATASET_PATH_IN_MINIO from data_chain.parser.parse_result import ParseResult, ParseNode from data_chain.parser.tools.token_tool import TokenTool @@ -47,7 +47,7 @@ class ImportDataSetWorker(BaseWorker): user_id=dataset_entity.author_id, op_id=dataset_entity.id, op_name=dataset_entity.name, - type=TaskType.KB_IMPORT.value, + type=TaskType.DATASET_IMPORT.value, retry=0, status=TaskStatus.PENDING.value) task_entity = await TaskManager.add_task(task_entity) @@ -71,6 +71,10 @@ class ImportDataSetWorker(BaseWorker): return True else: await DatasetManager.update_dataset_by_dataset_id(task_entity.op_id, {"status": DataSetStatus.DELETED.value}) + await MinIO.delete_object( + IMPORT_DATASET_PATH_IN_MINIO, + str(task_entity.op_id) + ) return False @staticmethod @@ -118,14 +122,14 @@ class ImportDataSetWorker(BaseWorker): 'xlsx': XlsxParser } extension = None - for parser_name, parser in parsers: + for parser_name, parser in parsers.items(): try: parser_result = await parser.parser(file_path) extension = parser_name break except Exception as e: err = f"[GenerateDataSetWorker] 解析文件失败,文件路径: {file_path},错误信息: {e}" - logging.exception(err) + logging.error(err) if parser_result is None: err = f"[GenerateDataSetWorker] 解析文件失败,文件路径: {file_path},不支持的文件格式" logging.exception(err) @@ -153,12 +157,11 @@ class ImportDataSetWorker(BaseWorker): question=question, answer=answer, chunk=chunk, + chunk_type=ChunkType.UNKOWN.value ) qa_entities.append(qa_entity) elif extension == 'json' or extension == 'yaml': - nodes = parser_result.nodes - for node in nodes: - tmp_dict = node.content + for tmp_dict in parser_result.nodes[0].content: if 'question' not in tmp_dict or 'answer' not in tmp_dict or 'chunk' not in tmp_dict: err = f"[GenerateDataSetWorker] qa对提取失败,文件路径: {file_path},qa对格式不正确" logging.exception(err) @@ -173,6 +176,7 @@ class ImportDataSetWorker(BaseWorker): question=question, answer=answer, chunk=chunk, + chunk_type=ChunkType.UNKOWN.value ) qa_entities.append(qa_entity) qa_entities = qa_entities[:512] @@ -194,7 +198,7 @@ class ImportDataSetWorker(BaseWorker): question = qa_entity.question answer = qa_entity.answer sys_call = cal_qa_score_prompt_template.format( - content=TokenTool.get_k_tokens_words_from_content(chunk, llm.max_tokens//9*4), + fragment=TokenTool.get_k_tokens_words_from_content(chunk, llm.max_tokens//9*4), question=TokenTool.get_k_tokens_words_from_content(question, llm.max_tokens//9), answer=TokenTool.get_k_tokens_words_from_content(answer, llm.max_tokens//9*4) ) @@ -202,7 +206,8 @@ class ImportDataSetWorker(BaseWorker): score = await llm.nostream([], sys_call, usr_call) score = eval(score) databse_score += score - databse_score /= len(qa_entities) + if len(qa_entities) > 0: + databse_score /= len(qa_entities) await DatasetManager.update_dataset_by_dataset_id(dataset_id, {"score": databse_score}) @staticmethod @@ -227,14 +232,14 @@ class ImportDataSetWorker(BaseWorker): raise Exception(err) await DatasetManager.update_dataset_by_dataset_id(dataset_entity.id, {"status": DataSetStatus.IMPORTING.value}) current_stage = 0 - stage_cnt = 3 + stage_cnt = 4 tmp_path = await ImportDataSetWorker.init_path(task_id) current_stage += 1 await ImportDataSetWorker.report(task_id, "初始化路径", current_stage, stage_cnt) - await ImportDataSetWorker.download_file_from_minio(dataset_entity.id, tmp_path) + file_path = await ImportDataSetWorker.download_file_from_minio(dataset_entity.id, tmp_path) current_stage += 1 await ImportDataSetWorker.report(task_id, "下载文件", current_stage, stage_cnt) - qa_entities = await ImportDataSetWorker.load_qa_entity_from_file(dataset_entity.id, tmp_path) + qa_entities = await ImportDataSetWorker.load_qa_entity_from_file(dataset_entity.id, file_path) current_stage += 1 await ImportDataSetWorker.report(task_id, "加载qa实体", current_stage, stage_cnt) await ImportDataSetWorker.update_dataset_score(dataset_entity.id, qa_entities, llm) diff --git a/data_chain/apps/router/document.py b/data_chain/apps/router/document.py index 3494ea9..541eb2b 100644 --- a/data_chain/apps/router/document.py +++ b/data_chain/apps/router/document.py @@ -93,7 +93,7 @@ async def upload_docs( @router.post('/parse', response_model=ParseDocumentResponse, dependencies=[Depends(verify_user)]) async def parse_docuement_by_doc_ids( user_sub: Annotated[str, Depends(get_user_sub)], - doc_ids: Annotated[list[UUID], Query(alias="docIds")], + doc_ids: Annotated[list[UUID], Body(alias="docIds")], parse: Annotated[bool, Query()]): for doc_id in doc_ids: if not (await DocumentService.validate_user_action_to_document(user_sub, doc_id, "parse")): @@ -116,13 +116,13 @@ async def update_doc_by_doc_id( @router.put('/switch', response_model=UpdateDocumentResponse, dependencies=[Depends(verify_user)]) - @router.delete('', response_model=DeleteDocumentResponse, dependencies=[Depends(verify_user)]) async def delete_docs_by_ids( user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], doc_ids: Annotated[list[UUID], Body(alias="docIds")]): for doc_id in doc_ids: - if not (await DocumentService.validate_user_action_to_document(user_sub, doc_id, "delete")): + if not (await DocumentService.validate_user_action_to_document(user_sub, doc_id, action)): raise Exception("用户没有权限删除该文档") await DocumentService.delete_docs_by_ids(doc_ids) return DeleteDocumentResponse(result=doc_ids) diff --git a/data_chain/apps/service/dataset_service.py b/data_chain/apps/service/dataset_service.py index 0493045..a0943a5 100644 --- a/data_chain/apps/service/dataset_service.py +++ b/data_chain/apps/service/dataset_service.py @@ -193,9 +193,13 @@ class DataSetService: raise Exception(err) dataset_import_task_ids = [] for dataset_package in dataset_packages: - tmp_path = os.join(IMPORT_DATASET_PATH_IN_OS, str(uuid.uuid4())) + id = uuid.uuid4() + tmp_path = os.path.join(IMPORT_DATASET_PATH_IN_OS, str(id)) + if os.path.exists(tmp_path): + os.remove(tmp_path) + os.makedirs(tmp_path, exist_ok=True) file_name = dataset_package.filename - file_path = os.join(tmp_path, file_name) + file_path = os.path.join(tmp_path, file_name) try: async with aiofiles.open(file_path, 'wb') as out_file: content = await dataset_package.read() @@ -204,8 +208,17 @@ class DataSetService: err = "保存数据集包失败" logging.exception("[DataSetService] %s", err) raise e + if not (await MinIO.put_object( + IMPORT_DATASET_PATH_IN_MINIO, + str(id), + file_path + )): + err = "上传数据集包失败" + logging.error("[DataSetService] %s", err) + continue try: dataset_entity = DataSetEntity( + id=id, team_id=kb_entity.team_id, kb_id=kb_entity.id, author_id=user_sub, @@ -226,16 +239,6 @@ class DataSetService: logging.exception("[DataSetService] %s", err) continue - try: - await MinIO.put_object( - bucket_name=IMPORT_DATASET_PATH_IN_MINIO, - object_name=dataset_entity.id, - file_path=file_path - ) - except Exception as e: - err = "上传数据集包到minio失败" - logging.exception("[DataSetService] %s", err) - continue task_id = await TaskQueueService.init_task(TaskType.DATASET_IMPORT.value, dataset_entity.id) if task_id: dataset_import_task_ids.append(task_id) diff --git a/data_chain/entities/enum.py b/data_chain/entities/enum.py index 00faf7e..f566e4f 100644 --- a/data_chain/entities/enum.py +++ b/data_chain/entities/enum.py @@ -106,6 +106,8 @@ class ChunkType(str, Enum): CODE = "code" LINK = "link" QA = "qa" + JSON = "json" + UNKOWN = "unknown" class ChunkParseTopology(str, Enum): diff --git a/data_chain/manager/document_manager.py b/data_chain/manager/document_manager.py index e36e544..77671b8 100644 --- a/data_chain/manager/document_manager.py +++ b/data_chain/manager/document_manager.py @@ -238,12 +238,12 @@ class DocumentManager(): and_(DocumentEntity.id.in_(doc_ids), DocumentEntity.status != DocumentStatus.DELETED.value) ).values(**doc_dict) - result = await session.execute(stmt) + await session.execute(stmt) await session.commit() stmt = select(DocumentEntity).where( DocumentEntity.id.in_(doc_ids) ) - document_entities = await session.execute(stmt) + result = await session.execute(stmt) document_entities = result.scalars().all() return document_entities except Exception as e: diff --git a/data_chain/parser/handler/json_parser.py b/data_chain/parser/handler/json_parser.py index f221e3f..0f08912 100644 --- a/data_chain/parser/handler/json_parser.py +++ b/data_chain/parser/handler/json_parser.py @@ -1,6 +1,6 @@ import json import uuid - +from data_chain.entities.enum import DocParseRelutTopology, ChunkType, ChunkParseTopology from data_chain.parser.parse_result import ParseNode, ParseResult from data_chain.parser.handler.base_parser import BaseParser from data_chain.logger.logger import logger as logging @@ -22,13 +22,13 @@ class JsonParser(BaseParser): node = ParseNode( id=uuid.uuid4(), lv=0, - parse_topology_type=None, + parse_topology_type=ChunkParseTopology.GERNERAL, content=data, - type=None, + type=ChunkType.JSON, link_nodes=[] ) parse_result = ParseResult( - parse_topology_type=None, + parse_topology_type=DocParseRelutTopology.LIST, nodes=[node] ) return parse_result diff --git a/data_chain/parser/handler/yaml_parser.py b/data_chain/parser/handler/yaml_parser.py index e422af4..35e1aa9 100644 --- a/data_chain/parser/handler/yaml_parser.py +++ b/data_chain/parser/handler/yaml_parser.py @@ -1,6 +1,7 @@ import yaml import uuid +from data_chain.entities.enum import DocParseRelutTopology, ChunkType, ChunkParseTopology from data_chain.parser.parse_result import ParseNode, ParseResult from data_chain.parser.handler.base_parser import BaseParser from data_chain.logger.logger import logger as logging @@ -21,13 +22,13 @@ class YamlParser(BaseParser): node = ParseNode( id=uuid.uuid4(), lv=0, - parse_topology_type=None, + parse_topology_type=ChunkParseTopology.GERNERAL, content=content, - type=None, + type=ChunkType.JSON, link_nodes=[] ) parse_result = ParseResult( - parse_topology_type=None, + parse_topology_type=DocParseRelutTopology.LIST, nodes=[node] ) return parse_result diff --git a/openapi.json b/openapi.json index e46fc67..0b27807 100644 --- a/openapi.json +++ b/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"FastAPI","version":"0.1.0"},"paths":{"/routes":{"get":{"summary":"Get All Routes","operationId":"get_all_routes_routes_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/team/list":{"post":{"tags":["Team"],"summary":"List Teams","operationId":"list_teams_team_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/usr":{"post":{"tags":["Team"],"summary":"List Team User By Team Id","operationId":"list_team_user_by_team_id_team_usr_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_usr_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamUserRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team User By Team Id And User Subs","operationId":"delete_team_user_by_team_id_and_user_subs_team_usr_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string"},"title":"Usersub"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/msg":{"post":{"tags":["Team"],"summary":"List Team Msg By Team Id","operationId":"list_team_msg_by_team_id_team_msg_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team":{"post":{"tags":["Team"],"summary":"Create Team","operationId":"create_team_team_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team By Team Id","operationId":"delete_team_by_team_id_team_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/invitation":{"post":{"tags":["Team"],"summary":"Invite Team User By User Sub","operationId":"invite_team_user_by_user_sub_team_invitation_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"userSubInvite","in":"query","required":true,"schema":{"type":"string","title":"Usersubinvite"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/InviteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/application":{"post":{"tags":["Team"],"summary":"Join Team","operationId":"join_team_team_application_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JoinTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/author":{"put":{"tags":["Team"],"summary":"Update Team Author By Team Id","operationId":"update_team_author_by_team_id_team_author_put","parameters":[{"name":"recriverSub","in":"query","required":true,"schema":{"type":"string","title":"Recriversub"}},{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamAuthorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb":{"get":{"tags":["Knowledge Base"],"summary":"List Kb By User Sub","operationId":"list_kb_by_user_sub_kb_get","parameters":[{"name":"kbName","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListAllKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"tags":["Knowledge Base"],"summary":"Create Kb","operationId":"create_kb_kb_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Knowledge Base"],"summary":"Update Kb By Kb Id","operationId":"update_kb_by_kb_id_kb_put","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Knowledge Base"],"summary":"Delete Kb By Kb Ids","operationId":"delete_kb_by_kb_ids_kb_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/team":{"post":{"tags":["Knowledge Base"],"summary":"List Kb By Team Id","operationId":"list_kb_by_team_id_kb_team_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/doc_type":{"get":{"tags":["Knowledge Base"],"summary":"List Doc Types By Kb Id","operationId":"list_doc_types_by_kb_id_kb_doc_type_get","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentTypesResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/download":{"get":{"tags":["Knowledge Base"],"summary":"Download Kb By Task Id","operationId":"download_kb_by_task_id_kb_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/import":{"post":{"tags":["Knowledge Base"],"summary":"Import Kbs","operationId":"import_kbs_kb_import_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_kbs_kb_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/export":{"post":{"tags":["Knowledge Base"],"summary":"Export Kb By Kb Ids","operationId":"export_kb_by_kb_ids_kb_export_post","parameters":[{"name":"kbIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/list":{"post":{"tags":["Chunk"],"summary":"List Chunks By Document Id","operationId":"list_chunks_by_document_id_chunk_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/search":{"post":{"tags":["Chunk"],"summary":"Search Chunks","operationId":"search_chunks_chunk_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/":{"put":{"tags":["Chunk"],"summary":"Update Chunk By Id","operationId":"update_chunk_by_id_chunk__put","parameters":[{"name":"chunkId","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Chunkid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/switch":{"put":{"tags":["Chunk"],"summary":"Update Chunk Enabled By Id","operationId":"update_chunk_enabled_by_id_chunk_switch_put","parameters":[{"name":"enabled","in":"query","required":true,"schema":{"type":"boolean","title":"Enabled"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Chunkid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkEnabledResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/list":{"post":{"tags":["Document"],"summary":"List Doc","operationId":"list_doc_doc_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/download":{"get":{"tags":["Document"],"summary":"Download Doc By Id","operationId":"download_doc_by_id_doc_download_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/report":{"get":{"tags":["Document"],"summary":"Get Doc Report","operationId":"get_doc_report_doc_report_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetDocumentReportResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc":{"post":{"tags":["Document"],"summary":"Upload Docs","operationId":"upload_docs_doc_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_upload_docs_doc_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UploadDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Document"],"summary":"Update Doc By Doc Id","operationId":"update_doc_by_doc_id_doc_put","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/parse":{"post":{"tags":["Document"],"summary":"Parse Docuement By Doc Ids","operationId":"parse_docuement_by_doc_ids_doc_parse_post","parameters":[{"name":"docIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}},{"name":"parse","in":"query","required":true,"schema":{"type":"boolean","title":"Parse"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ParseDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/switch":{"put":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_switch_put","requestBody":{"content":{"application/json":{"schema":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Docids"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/health_check":{"get":{"tags":["Health check"],"summary":"Health Check","operationId":"health_check_health_check_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/dataset/list":{"post":{"tags":["Dataset"],"summary":"List Dataset By Kb Id","operationId":"list_dataset_by_kb_id_dataset_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/data":{"post":{"tags":["Dataset"],"summary":"List Data In Dataset","operationId":"list_data_in_dataset_dataset_data_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Data By Dataset Id","operationId":"update_data_by_dataset_id_dataset_data_put","parameters":[{"name":"dataId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Dataid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Data By Data Ids","operationId":"delete_data_by_data_ids_dataset_data_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Dataids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/testing/exist":{"get":{"tags":["Dataset"],"summary":"Is Dataset Have Testing","operationId":"is_dataset_have_testing_dataset_testing_exist_get","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/IsDatasetHaveTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/download":{"get":{"tags":["Dataset"],"summary":"Download Dataset By Task Id","operationId":"download_dataset_by_task_id_dataset_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset":{"post":{"tags":["Dataset"],"summary":"Create Dataset","operationId":"create_dataset_dataset_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Dataset By Dataset Id","operationId":"update_dataset_by_dataset_id_dataset_put","parameters":[{"name":"databaseId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Databaseid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Dataset By Dataset Ids","operationId":"delete_dataset_by_dataset_ids_dataset_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Databaseid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/import":{"post":{"tags":["Dataset"],"summary":"Import Dataset","operationId":"import_dataset_dataset_import_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_dataset_dataset_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/export":{"post":{"tags":["Dataset"],"summary":"Export Dataset By Dataset Ids","operationId":"export_dataset_by_dataset_ids_dataset_export_post","parameters":[{"name":"datasetIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Datasetids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/generate":{"post":{"tags":["Dataset"],"summary":"Generate Dataset By Id","operationId":"generate_dataset_by_id_dataset_generate_post","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}},{"name":"generate","in":"query","required":true,"schema":{"type":"boolean","title":"Generate"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/other/llm":{"get":{"tags":["Other"],"summary":"List Llms By User Sub","operationId":"list_llms_by_user_sub_other_llm_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListLLMResponse"}}}}}}},"/other/embedding":{"get":{"tags":["Other"],"summary":"List Embeddings","operationId":"list_embeddings_other_embedding_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListEmbeddingResponse"}}}}}}},"/other/tokenizer":{"get":{"tags":["Other"],"summary":"List Tokenizers","operationId":"list_tokenizers_other_tokenizer_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTokenizerResponse"}}}}}}},"/other/parse_method":{"get":{"tags":["Other"],"summary":"List Parse Method","operationId":"list_parse_method_other_parse_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListParseMethodResponse"}}}}}}},"/other/search_method":{"get":{"tags":["Other"],"summary":"List Search Method","operationId":"list_search_method_other_search_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListSearchMethodResponse"}}}}}}},"/testing/list":{"post":{"tags":["Testing"],"summary":"List Testing By Kb Id","operationId":"list_testing_by_kb_id_testing_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/testcase":{"post":{"tags":["Testing"],"summary":"List Testcase By Testing Id","operationId":"list_testcase_by_testing_id_testing_testcase_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestCaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/download":{"get":{"tags":["Testing"],"summary":"Download Testing Report By Testing Id","operationId":"download_testing_report_by_testing_id_testing_download_get","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing":{"post":{"tags":["Testing"],"summary":"Create Testing","operationId":"create_testing_testing_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingResponsing"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Testing"],"summary":"Update Testing By Testing Id","operationId":"update_testing_by_testing_id_testing_put","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Testing"],"summary":"Delete Testing By Testing Ids","operationId":"delete_testing_by_testing_ids_testing_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Testingids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/run":{"post":{"tags":["Testing"],"summary":"Run Testing By Testing Id","operationId":"run_testing_by_testing_id_testing_run_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}},{"name":"run","in":"query","required":true,"schema":{"type":"boolean","title":"Run"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/RunTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role/action":{"get":{"tags":["Role"],"summary":"List Actions","operationId":"list_actions_role_action_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListActionResponse"}}}}}}},"/role/list":{"post":{"tags":["Role"],"summary":"List Role By Team Id","operationId":"list_role_by_team_id_role_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role":{"post":{"tags":["Role"],"summary":"Create Role","operationId":"create_role_role_post","parameters":[{"name":"TeamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Role"],"summary":"Update Role By Role Id","operationId":"update_role_by_role_id_role_put","parameters":[{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Role"],"summary":"Delete Role By Role Ids","operationId":"delete_role_by_role_ids_role_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Roleid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg/list":{"post":{"tags":["User Message"],"summary":"List User Msgs By User Sub","operationId":"list_user_msgs_by_user_sub_usr_msg_list_post","parameters":[{"name":"msgType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserMessageType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg":{"put":{"tags":["User Message"],"summary":"Update User Msg By Msg Id","operationId":"update_user_msg_by_msg_id_usr_msg_put","parameters":[{"name":"msgId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Msgid"}},{"name":"msgStatus","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserStatus"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["User Message"],"summary":"Delete User Msg By Msg Ids","operationId":"delete_user_msg_by_msg_ids_usr_msg_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Msgids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task":{"post":{"tags":["Task"],"summary":"List Task","operationId":"list_task_task_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/one":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Id","operationId":"delete_task_by_task_id_task_one_delete","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByIdResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/all":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Type","operationId":"delete_task_by_task_type_task_all_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"taskType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/TaskType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByTypeResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"ActionType":{"type":"string","enum":["team","user","role","knowledge_base","document","chunk","dataset","testing","dataset_data","task"],"title":"ActionType","description":"操作类型"},"Body_import_dataset_dataset_import_post":{"properties":{"dataset_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Dataset Packages"}},"type":"object","required":["dataset_packages"],"title":"Body_import_dataset_dataset_import_post"},"Body_import_kbs_kb_import_post":{"properties":{"kb_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Kb Packages"}},"type":"object","required":["kb_packages"],"title":"Body_import_kbs_kb_import_post"},"Body_upload_docs_doc_post":{"properties":{"docs":{"items":{"type":"string","format":"binary"},"type":"array","title":"Docs"}},"type":"object","required":["docs"],"title":"Body_upload_docs_doc_post"},"Chunk":{"properties":{"chunkId":{"type":"string","format":"uuid","title":"Chunkid","description":"分片ID"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"分片类型"},"text":{"type":"string","title":"Text","description":"分片文本"}},"type":"object","required":["chunkId","chunkType","text"],"title":"Chunk","description":"文档分片信息"},"ChunkType":{"type":"string","enum":["text","table","image","code","link","qa"],"title":"ChunkType","description":"分块类型"},"CreateDatasetRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""},"documentIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Documentids","description":"测试数据集关联的文档","default":[]},"dataCnt":{"type":"integer","title":"Datacnt","description":"测试数据集内的数据数量","default":64,"max":512,"min":1},"llmId":{"type":"string","title":"Llmid","description":"测试数据集使用的大模型id"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"测试数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"测试数据集进行上下文关联","default":false}},"type":"object","required":["kbId","llmId"],"title":"CreateDatasetRequest"},"CreateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集生成任务ID"}},"type":"object","title":"CreateDatasetResponse","description":"POST /dataset 响应"},"CreateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":20,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"知识库使用的embedding模型","default":""},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"CreateKnowledgeBaseRequest"},"CreateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"CreateKnowledgeBaseResponse","description":"POST /kb 响应"},"CreateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"CreateRoleRequest"},"CreateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"CreateRoleResponse","description":"POST /role 响应"},"CreateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"CreateTeamRequest"},"CreateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"CreateTeamResponse","description":"POST /team 响应"},"CreateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":30,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"测试数据集id"},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_and_vector"},"topK":{"type":"integer","title":"Topk","description":"测试中检索方法关联的片段数量","default":5}},"type":"object","required":["datasetId","llmId"],"title":"CreateTestingRequest"},"CreateTestingResponsing":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"CreateTestingResponsing","description":"POST /testing 响应"},"Data":{"properties":{"dataId":{"type":"string","format":"uuid","title":"Dataid","description":"数据ID"},"docName":{"type":"string","title":"Docname","description":"数据关联的文档名称"},"question":{"type":"string","title":"Question","description":"数据的问题"},"answer":{"type":"string","title":"Answer","description":"数据的答案"},"chunk":{"type":"string","title":"Chunk","description":"数据的片段"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"数据的片段类型"}},"type":"object","required":["dataId","docName","question","answer","chunk","chunkType"],"title":"Data"},"DataSetStatus":{"type":"string","enum":["idle","pending","generating","exporting","importing","deleted"],"title":"DataSetStatus","description":"数据集状态"},"Dataset":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"数据集描述","max":150},"dataCnt":{"type":"integer","title":"Datacnt","description":"数据集数据数量"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"数据集进行上下文关联","default":false},"isImported":{"type":"boolean","title":"Isimported","description":"数据集是否导入","default":false},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"生成数据集使用的大模型信息"},"generateTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"数据集生成任务"},"score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Score","description":"数据集评分"},"authorName":{"type":"string","title":"Authorname","description":"数据集创建者的用户名"},"status":{"$ref":"#/components/schemas/DataSetStatus"}},"type":"object","required":["datasetId","datasetName","description","dataCnt","authorName","status"],"title":"Dataset","description":"数据集信息"},"DatasetTesting":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称"},"testings":{"items":{"$ref":"#/components/schemas/Testing"},"type":"array","title":"Testings","description":"测试列表","default":[]}},"type":"object","required":["datasetId","datasetName"],"title":"DatasetTesting","description":"数据集测试信息"},"DeleteDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据ID列表","default":[]}},"type":"object","title":"DeleteDataResponse","description":"DELETE /dataset/data 响应"},"DeleteDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据集ID列表","default":[]}},"type":"object","title":"DeleteDatasetResponse","description":"DELETE /dataset 响应"},"DeleteDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"DeleteDocumentResponse","description":"DELETE /doc 响应"},"DeleteKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"知识库ID列表","default":[]}},"type":"object","title":"DeleteKnowledgeBaseResponse","description":"DELETE /kb 响应"},"DeleteRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"角色ID列表","default":[]}},"type":"object","title":"DeleteRoleResponse","description":"DELETE /role 响应"},"DeleteTaskByIdResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"任务ID"}},"type":"object","title":"DeleteTaskByIdResponse","description":"DELETE /task/one 响应"},"DeleteTaskByTypeResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"DeleteTaskByTypeResponse","description":"DELETE /task/all 响应"},"DeleteTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"DeleteTeamResponse","description":"DELETE /team 响应"},"DeleteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"团队成员ID列表","default":[]}},"type":"object","title":"DeleteTeamUserResponse","description":"DELETE /team/usr 响应"},"DeleteTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"测试ID列表","default":[]}},"type":"object","title":"DeleteTestingResponse","description":"DELETE /testing 响应"},"DeleteUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"消息ID列表","default":[]}},"type":"object","title":"DeleteUserMessageResponse","description":"DELETE /usr_msg 响应"},"DocChunk":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","required":["docId","docName"],"title":"DocChunk","description":"Post /chunk/search 数据结构"},"Document":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"docType":{"allOf":[{"$ref":"#/components/schemas/DocumentType-Output"}],"description":"文档类型"},"chunkSize":{"type":"integer","title":"Chunksize","description":"文档分片大小"},"createdTime":{"type":"string","title":"Createdtime","description":"文档创建时间"},"docTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"文档任务"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"文档解析方法"},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用"},"authorName":{"type":"string","title":"Authorname","description":"文档创建者的用户名"},"status":{"$ref":"#/components/schemas/DocumentStatus"}},"type":"object","required":["docId","docName","docType","chunkSize","createdTime","parseMethod","enabled","authorName","status"],"title":"Document","description":"文档信息"},"DocumentStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"DocumentStatus","description":"文档状态"},"DocumentType-Input":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"docTypeName":{"type":"string","maxLength":20,"minLength":1,"title":"Doctypename","default":"这是一个默认的文档类型名称"}},"type":"object","title":"DocumentType"},"DocumentType-Output":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型ID"},"docTypeName":{"type":"string","title":"Doctypename","description":"文档类型名称"}},"type":"object","required":["docTypeId","docTypeName"],"title":"DocumentType","description":"文档类型信息"},"ExportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ExportDatasetResponse","description":"POST /dataset/export 响应"},"ExportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ExportKnowledgeBaseResponse","description":"POST /kb/export 响应"},"GenerateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"GenerateDatasetResponse","description":"POST /dataset/generate 响应"},"GetDocumentReportResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","title":"Result","description":"文档报告数据结构","default":""}},"type":"object","title":"GetDocumentReportResponse","description":"GET /doc/report 响应"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ImportDatasetResponse","description":"POST /dataset/import 响应"},"ImportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ImportKnowledgeBaseResponse","description":"POST /kb/import 响应"},"InviteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"邀请ID"}},"type":"object","title":"InviteTeamUserResponse","description":"POST /team/invitation 响应"},"IsDatasetHaveTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"boolean","title":"Result","description":"数据集是否有测试任务","default":false}},"type":"object","title":"IsDatasetHaveTestingResponse","description":"GET /dataset/testing/exist 响应"},"JoinTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"申请ID"}},"type":"object","title":"JoinTeamResponse","description":"POST /team/application 响应"},"Knowledgebase":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"知识库ID"},"kbName":{"type":"string","title":"Kbname","description":"知识库名称","max":20,"min":1},"authorName":{"type":"string","title":"Authorname","description":"知识库创建者的用户名"},"tokenizer":{"$ref":"#/components/schemas/Tokenizer"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"嵌入模型"},"description":{"type":"string","title":"Description","description":"知识库描述","max":150},"docCnt":{"type":"integer","title":"Doccnt","description":"知识库文档数量"},"docSize":{"type":"integer","title":"Docsize","description":"知识库文档大小"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库单次文件上传数量限制"},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库单次文件上传大小限制"},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"默认解析方法"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"默认分块大小"},"createdTime":{"type":"string","title":"Createdtime","description":"知识库创建时间"},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Doctypes","description":"知识库文档类型列表","default":[]}},"type":"object","required":["kbId","kbName","authorName","tokenizer","embeddingModel","description","docCnt","docSize","uploadCountLimit","uploadSizeLimit","defaultParseMethod","defaultChunkSize","createdTime"],"title":"Knowledgebase","description":"知识库信息"},"LLM":{"properties":{"llmId":{"type":"string","title":"Llmid","description":"大模型ID"},"llmName":{"type":"string","title":"Llmname","description":"大模型名称","max":20,"min":1},"llmIcon":{"type":"string","title":"Llmicon","description":"大模型图标"}},"type":"object","required":["llmId","llmName","llmIcon"],"title":"LLM"},"ListActionMsg":{"properties":{"actionTypes":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Actiontypes","description":"操作类型列表","default":[]}},"type":"object","title":"ListActionMsg","description":"GET /role/action 数据结构"},"ListActionResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListActionMsg"}],"description":"操作列表数据结构","default":{"actionTypes":[]}}},"type":"object","title":"ListActionResponse"},"ListAllKnowledgeBaseMsg":{"properties":{"teamKnowledgebases":{"items":{"$ref":"#/components/schemas/TeamKnowledgebase"},"type":"array","title":"Teamknowledgebases","description":"团队知识库列表","default":[]}},"type":"object","title":"ListAllKnowledgeBaseMsg","description":"GET /kb 数据结构"},"ListAllKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListAllKnowledgeBaseMsg"}],"description":"团队知识库列表数据结构","default":{"teamKnowledgebases":[]}}},"type":"object","title":"ListAllKnowledgeBaseResponse","description":"GET /kb 响应"},"ListChunkMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","title":"ListChunkMsg","description":"GET /chunk 数据结构"},"ListChunkRequest":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档id"},"text":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Text","description":"分块文本内容"},"types":{"anyOf":[{"items":{"$ref":"#/components/schemas/ChunkType"},"type":"array"},{"type":"null"}],"title":"Types","description":"分块类型"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["docId"],"title":"ListChunkRequest"},"ListChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListChunkMsg"}],"description":"分片列表数据结构","default":{"total":0,"chunks":[]}}},"type":"object","title":"ListChunkResponse","description":"GET /chunk 响应"},"ListDataInDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"data":{"items":{"$ref":"#/components/schemas/Data"},"type":"array","title":"Data","description":"数据列表","default":[]}},"type":"object","title":"ListDataInDatasetMsg","description":"GET /dataset/data 数据结构"},"ListDataInDatasetRequest":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["datasetId"],"title":"ListDataInDatasetRequest"},"ListDataInDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDataInDatasetMsg"}],"description":"数据列表数据结构","default":{"total":0,"data":[]}}},"type":"object","title":"ListDataInDatasetResponse","description":"GET /dataset/data 响应"},"ListDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasets":{"items":{"$ref":"#/components/schemas/Dataset"},"type":"array","title":"Datasets","description":"数据集列表","default":[]}},"type":"object","title":"ListDatasetMsg","description":"GET /dataset 数据结构"},"ListDatasetRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"datasetId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Datasetid","description":"数据集id"},"datasetName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Datasetname","description":"数据集名称"},"dataCnt":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集数据数量","default":"desc"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"数据集使用的大模型id"},"isDataCleared":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Isdatacleared","description":"数据集是否清洗"},"isChunkRelated":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Ischunkrelated","description":"数据集是否上下文关联"},"generateStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Generatestatus","description":"数据集生成状态"},"scoreOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集评分的排序方法","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"数据集创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["kbId"],"title":"ListDatasetRequest"},"ListDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDatasetMsg"}],"description":"数据集列表数据结构","default":{"total":0,"datasets":[]}}},"type":"object","title":"ListDatasetResponse","description":"GET /dataset 响应"},"ListDocumentMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"documents":{"items":{"$ref":"#/components/schemas/Document"},"type":"array","title":"Documents","description":"文档列表","default":[]}},"type":"object","title":"ListDocumentMsg","description":"GET /doc 数据结构"},"ListDocumentRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"docId":{"anyOf":[{"type":"string","maxLength":30,"minLength":1,"format":"uuid"},{"type":"null"}],"title":"Docid","description":"文档id"},"docName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Docname","description":"文档名称"},"docTypeId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Doctypeid","description":"文档类型id"},"parseStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Parsestatus","description":"文档解析状态"},"parseMethod":{"anyOf":[{"$ref":"#/components/schemas/ParseMethod"},{"type":"null"}],"description":"文档解析方法"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"文档创建者"},"createdTimeStart":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimestart","description":"文档创建时间开始"},"createdTimeEnd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimeend","description":"文档创建时间结束"},"createdTimeOrder":{"allOf":[{"$ref":"#/components/schemas/OrderType"}],"description":"文档创建时间排序","default":"desc"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["kbId"],"title":"ListDocumentRequest"},"ListDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDocumentMsg"}],"description":"文档列表数据结构","default":{"total":0,"documents":[]}}},"type":"object","title":"ListDocumentResponse","description":"GET /doc 响应"},"ListDocumentTypesResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Result","description":"文档类型列表数据结构","default":[]}},"type":"object","title":"ListDocumentTypesResponse","description":"GET /kb/doc_type 响应"},"ListEmbeddingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"向量化模型的列表数据结构","default":[]}},"type":"object","title":"ListEmbeddingResponse","description":"GET /other/embedding 数据结构"},"ListKnowledgeBaseMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表数据结构","default":[]}},"type":"object","title":"ListKnowledgeBaseMsg"},"ListKnowledgeBaseRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"kbName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname","description":"资产名称"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"资产创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListKnowledgeBaseRequest"},"ListKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListKnowledgeBaseMsg"}],"default":{"total":0,"kbList":[]}}},"type":"object","title":"ListKnowledgeBaseResponse","description":"GET /kb/team 响应"},"ListLLMMsg":{"properties":{"llms":{"items":{"$ref":"#/components/schemas/LLM"},"type":"array","title":"Llms","description":"大模型列表","default":[]}},"type":"object","title":"ListLLMMsg","description":"GET /other/llm 数据结构"},"ListLLMResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListLLMMsg"}],"description":"大模型列表数据结构","default":{"llms":[]}}},"type":"object","title":"ListLLMResponse","description":"GET /other/llm 响应"},"ListParseMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"解析方法的列表数据结构","default":[]}},"type":"object","title":"ListParseMethodResponse","description":"\"GET /other/parse_method 响应"},"ListRoleMsg":{"properties":{"roles":{"items":{"$ref":"#/components/schemas/role"},"type":"array","title":"Roles","description":"角色列表","default":[]}},"type":"object","title":"ListRoleMsg","description":"GET /role 数据结构"},"ListRoleRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队id"},"roleId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Roleid","description":"角色id"},"roleName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rolename","description":"角色名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListRoleRequest"},"ListRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListRoleMsg"}],"description":"角色列表数据结构","default":{"roles":[]}}},"type":"object","title":"ListRoleResponse","description":"GET /role 响应"},"ListSearchMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"搜索方法的列表数据结构","default":[]}},"type":"object","title":"ListSearchMethodResponse","description":"GET /other/search_method 响应"},"ListTaskMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"tasks":{"items":{"$ref":"#/components/schemas/Task"},"type":"array","title":"Tasks","description":"任务列表","default":[]}},"type":"object","title":"ListTaskMsg","description":"GET /task 数据结构"},"ListTaskRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队id"},"taskId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Taskid","description":"任务id"},"taskType":{"anyOf":[{"$ref":"#/components/schemas/TaskType"},{"type":"null"}],"description":"任务类型"},"taskStatus":{"anyOf":[{"$ref":"#/components/schemas/TaskStatus"},{"type":"null"}],"description":"任务状态"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListTaskRequest"},"ListTaskResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTaskMsg"}],"description":"任务列表数据结构","default":{"total":0,"tasks":[]}}},"type":"object","title":"ListTaskResponse","description":"GET /task 响应"},"ListTeamMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teams":{"items":{"$ref":"#/components/schemas/Team"},"type":"array","title":"Teams","description":"团队列表","default":[]}},"type":"object","title":"ListTeamMsg","description":"GET /team 数据结构"},"ListTeamMsgMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamMsgs":{"items":{"$ref":"#/components/schemas/TeamMsg"},"type":"array","title":"Teammsgs","description":"团队消息列表","default":[]}},"type":"object","title":"ListTeamMsgMsg","description":"GET /team/msg 数据结构"},"ListTeamMsgRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamMsgRequest"},"ListTeamMsgResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsgMsg"}],"description":"团队消息列表数据结构","default":{"total":0,"teamMsgs":[]}}},"type":"object","title":"ListTeamMsgResponse"},"ListTeamRequest":{"properties":{"teamType":{"allOf":[{"$ref":"#/components/schemas/TeamType"}],"description":"团队类型","default":"all"},"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"teamName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Teamname","description":"团队名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamRequest"},"ListTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsg"}],"description":"团队列表数据结构","default":{"total":0,"teams":[]}}},"type":"object","title":"ListTeamResponse","description":"GET /team 响应"},"ListTeamUserMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamUsers":{"items":{"$ref":"#/components/schemas/TeamUser"},"type":"array","title":"Teamusers","description":"团队成员列表","default":[]}},"type":"object","title":"ListTeamUserMsg","description":"GET /team/usr 数据结构"},"ListTeamUserRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"userSub":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Usersub","description":"用户ID"},"userName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Username","description":"用户名"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListTeamUserRequest"},"ListTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamUserMsg"}],"description":"团队成员列表数据结构","default":{"total":0,"teamUsers":[]}}},"type":"object","title":"ListTeamUserResponse"},"ListTestCaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/TestingTestCase"}],"description":"测试用例列表数据结构","default":{"aveScore":-1.0,"avePre":-1.0,"aveRec":-1.0,"aveFai":-1.0,"aveRel":-1.0,"aveLcs":-1.0,"aveLeve":-1.0,"aveJac":-1.0,"testCases":[]}}},"type":"object","title":"ListTestCaseResponse","description":"GET /testing/testcase 响应"},"ListTestingMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasetTestings":{"items":{"$ref":"#/components/schemas/DatasetTesting"},"type":"array","title":"Datasettestings","description":"数据集测试列表","default":[]}},"type":"object","title":"ListTestingMsg","description":"GET /testing 数据结构"},"ListTestingRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"testingId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Testingid","description":"测试id"},"testingName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Testingname","description":"测试名称"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"测试使用的大模型id"},"runStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Runstatus","description":"测试运行状态"},"scoresOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"测试评分","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"测试创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["kbId"],"title":"ListTestingRequest"},"ListTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTestingMsg"}],"description":"测试列表数据结构","default":{"total":0,"datasetTestings":[]}}},"type":"object","title":"ListTestingResponse","description":"GET /testing 响应"},"ListTokenizerResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"分词器的列表数据结构","default":[]}},"type":"object","title":"ListTokenizerResponse","description":"GET /other/tokenizer 响应"},"ListUserMessageMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"userMessages":{"items":{"$ref":"#/components/schemas/UserMsg"},"type":"array","title":"Usermessages","description":"用户消息列表","default":[]}},"type":"object","title":"ListUserMessageMsg","description":"GET /usr_msg 数据结构"},"ListUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListUserMessageMsg"}],"description":"用户消息列表数据结构","default":{"total":0,"userMessages":[]}}},"type":"object","title":"ListUserMessageResponse"},"OrderType":{"type":"string","enum":["asc","desc"],"title":"OrderType","description":"排序"},"ParseDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"ParseDocumentResponse","description":"POST /doc/parse 响应"},"ParseMethod":{"type":"string","enum":["general","ocr","enhanced","qa"],"title":"ParseMethod","description":"解析方法"},"RunTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"RunTestingResponse","description":"POST /testing/run 响应"},"SearchChunkMsg":{"properties":{"docChunks":{"items":{"$ref":"#/components/schemas/DocChunk"},"type":"array","title":"Docchunks","description":"文档分片列表","default":[]}},"type":"object","title":"SearchChunkMsg","description":"Post /chunk/search 数据结构"},"SearchChunkRequest":{"properties":{"kbIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Kbids","description":"资产id","default":[]},"query":{"type":"string","title":"Query","description":"查询内容","default":""},"topK":{"type":"integer","title":"Topk","description":"返回的结果数量","default":5},"docIds":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Docids","description":"文档id"},"bannedIds":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Bannedids","description":"禁止的分块id","default":[]},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"检索方法","default":"keyword_and_vector"},"isRelatedSurrounding":{"type":"boolean","title":"Isrelatedsurrounding","description":"是否关联上下文","default":true},"isClassifyByDoc":{"type":"boolean","title":"Isclassifybydoc","description":"是否按文档分类","default":false},"isRerank":{"type":"boolean","title":"Isrerank","description":"是否重新排序","default":false},"isCompress":{"type":"boolean","title":"Iscompress","description":"是否压缩","default":false},"tokensLimit":{"type":"integer","title":"Tokenslimit","description":"token限制","default":8192}},"type":"object","title":"SearchChunkRequest"},"SearchChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/SearchChunkMsg"}],"description":"文档分片列表数据结构","default":{"docChunks":[]}}},"type":"object","title":"SearchChunkResponse","description":"POST /chunk/search 响应"},"SearchMethod":{"type":"string","enum":["keyword","vector","keyword_and_vector","doc2chunk","doc2chunk_bfs","enhanced_by_llm"],"title":"SearchMethod","description":"搜索方法"},"Task":{"properties":{"opId":{"type":"string","format":"uuid","title":"Opid","description":"关联实体ID"},"opName":{"type":"string","title":"Opname","description":"关联实体的名称"},"taskId":{"type":"string","format":"uuid","title":"Taskid","description":"任务ID"},"taskStatus":{"$ref":"#/components/schemas/TaskStatus"},"taskType":{"$ref":"#/components/schemas/TaskType"},"taskCompleted":{"type":"number","title":"Taskcompleted","description":"任务完成度"},"createdTime":{"type":"string","title":"Createdtime","description":"任务创建时间"}},"type":"object","required":["opId","opName","taskId","taskStatus","taskType","taskCompleted","createdTime"],"title":"Task","description":"任务信息"},"TaskStatus":{"type":"string","enum":["pending","running","success","failed","canceled","deleted"],"title":"TaskStatus","description":"任务状态"},"TaskType":{"type":"string","enum":["doc_parse","kb_export","kb_import","dataset_export","dataset_import","dataset_generate","testing_run"],"title":"TaskType","description":"任务类型"},"Team":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","description":"团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","description":"团队描述"},"authorName":{"type":"string","title":"Authorname","description":"团队创建者的用户ID"},"memberCount":{"type":"integer","title":"Membercount","description":"团队成员数量"},"isPublic":{"type":"boolean","title":"Ispublic","description":"是否为公开团队"},"createdTime":{"type":"string","title":"Createdtime","description":"团队创建时间"}},"type":"object","required":["teamId","teamName","description","authorName","memberCount","isPublic","createdTime"],"title":"Team","description":"团队信息"},"TeamKnowledgebase":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","title":"Teamname","description":"团队名称"},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表","default":[]}},"type":"object","required":["teamId","teamName"],"title":"TeamKnowledgebase","description":"团队知识库信息"},"TeamMsg":{"properties":{"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"authorName":{"type":"string","title":"Authorname","description":"消息发送者的用户名"},"message":{"type":"string","title":"Message","description":"消息内容"}},"type":"object","required":["msgId","authorName","message"],"title":"TeamMsg","description":"团队信息"},"TeamType":{"type":"string","enum":["mycreated","myjoined","all"],"title":"TeamType","description":"团队类型"},"TeamUser":{"properties":{"userId":{"type":"string","format":"uuid","title":"Userid","description":"用户ID"},"userName":{"type":"string","title":"Username","description":"用户名"},"roleName":{"type":"string","title":"Rolename","description":"角色名称"}},"type":"object","required":["userId","userName","roleName"],"title":"TeamUser","description":"团队成员信息"},"TestCase":{"properties":{"testCaseId":{"type":"string","format":"uuid","title":"Testcaseid","description":"测试用例ID"},"question":{"type":"string","title":"Question","description":"问题"},"answer":{"type":"string","title":"Answer","description":"标准答案"},"llm_answer":{"type":"string","title":"Llm Answer","description":"大模型的回答"},"related_chunk":{"type":"string","title":"Related Chunk","description":"检索到的片段"},"doc_name":{"type":"string","title":"Doc Name","description":"来源文档"},"score":{"type":"number","title":"Score","description":"综合得分"},"pre":{"type":"number","title":"Pre","description":"精确率"},"rec":{"type":"number","title":"Rec","description":"召回率"},"fai":{"type":"number","title":"Fai","description":"忠实值"},"rel":{"type":"number","title":"Rel","description":"可解释性"},"lcs":{"type":"number","title":"Lcs","description":"最长公共子串得分"},"leve":{"type":"number","title":"Leve","description":"编辑距离得分"},"jac":{"type":"number","title":"Jac","description":"杰卡德相似系数"}},"type":"object","required":["testCaseId","question","answer","llm_answer","related_chunk","doc_name","score","pre","rec","fai","rel","lcs","leve","jac"],"title":"TestCase","description":"测试用例信息"},"Testing":{"properties":{"testingId":{"type":"string","format":"uuid","title":"Testingid","description":"测试ID"},"testingName":{"type":"string","title":"Testingname","description":"测试名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"测试描述","max":150},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"测试使用的大模型信息"},"searchMethod":{"$ref":"#/components/schemas/SearchMethod"},"testingTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"测试任务"},"aveScore":{"type":"number","title":"Avescore","description":"综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"杰卡德相似系数","default":-1},"authorName":{"type":"string","title":"Authorname","description":"测试创建者的用户名"},"status":{"$ref":"#/components/schemas/TestingStatus"}},"type":"object","required":["testingId","testingName","description","searchMethod","authorName","status"],"title":"Testing","description":"测试信息"},"TestingStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"TestingStatus","description":"测试状态"},"TestingTestCase":{"properties":{"aveScore":{"type":"number","title":"Avescore","description":"平均综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"平均精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"平均召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"平均忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"平均可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"平均最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"平均编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"平均杰卡德相似系数","default":-1},"testCases":{"items":{"$ref":"#/components/schemas/TestCase"},"type":"array","title":"Testcases","description":"测试用例列表","default":[]}},"type":"object","title":"TestingTestCase","description":"GET /testing/testcase 数据结构"},"Tokenizer":{"type":"string","enum":["中文","en","mix"],"title":"Tokenizer","description":"分词器"},"TypeAction":{"properties":{"actionType":{"$ref":"#/components/schemas/ActionType"},"actions":{"items":{"$ref":"#/components/schemas/action"},"type":"array","title":"Actions","description":"操作列表","default":[]}},"type":"object","required":["actionType"],"title":"TypeAction","description":"不同类别的类别操作"},"UpdateChunkEnabledResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"分片ID列表","default":[]}},"type":"object","title":"UpdateChunkEnabledResponse","description":"PUT /chunk 响应"},"UpdateChunkRequest":{"properties":{"text":{"type":"string","title":"Text","description":"分块文本内容","default":"这是一个默认的分块文本"},"enabled":{"type":"boolean","title":"Enabled","description":"分块是否启用","default":true}},"type":"object","title":"UpdateChunkRequest"},"UpdateChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"分片ID"}},"type":"object","title":"UpdateChunkResponse","description":"PUT /chunk 响应"},"UpdateDataRequest":{"properties":{"question":{"type":"string","maxLength":200,"minLength":1,"title":"Question","description":"问题","default":"这是一个默认的问题"},"answer":{"type":"string","maxLength":1000,"minLength":1,"title":"Answer","description":"答案","default":"这是一个默认的答案"}},"type":"object","title":"UpdateDataRequest"},"UpdateDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据ID"}},"type":"object","title":"UpdateDataResponse","description":"PUT /dataset/data 响应"},"UpdateDatasetRequest":{"properties":{"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""}},"type":"object","title":"UpdateDatasetRequest"},"UpdateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"UpdateDatasetResponse","description":"PUT /dataset 响应"},"UpdateDocumentRequest":{"properties":{"docName":{"type":"string","maxLength":150,"minLength":1,"title":"Docname","default":"这是一个默认的文档名称"},"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"chunkSize":{"type":"integer","title":"Chunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用","default":true}},"type":"object","title":"UpdateDocumentRequest"},"UpdateDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","format":"uuid","title":"Result","description":"文档ID"}},"type":"object","title":"UpdateDocumentResponse","description":"PUT /doc 响应"},"UpdateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":30,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"UpdateKnowledgeBaseRequest"},"UpdateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"UpdateKnowledgeBaseResponse","description":"PUT /kb 响应"},"UpdateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"UpdateRoleRequest"},"UpdateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"UpdateRoleResponse","description":"PUT /role 响应"},"UpdateTeamAuthorResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamAuthorResponse","description":"PUT /team/author 响应"},"UpdateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"UpdateTeamRequest"},"UpdateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamResponse","description":"PUT /team 响应"},"UpdateTeamUserRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队成员ID"}},"type":"object","title":"UpdateTeamUserRoleResponse","description":"PUT /team/usr 响应"},"UpdateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":150,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_and_vector"},"topK":{"type":"integer","title":"Topk","description":"测试中检索方法关联的片段数量","default":5}},"type":"object","required":["llmId"],"title":"UpdateTestingRequest"},"UpdateTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"UpdateTestingResponse","description":"PUT /testing 响应"},"UpdateUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"消息ID"}},"type":"object","title":"UpdateUserMessageResponse","description":"PUT /usr_msg 响应"},"UploadDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"UploadDocumentResponse","description":"POST /doc 响应"},"UserMessageStatus":{"type":"string","enum":["unread","accepted","rejected"],"title":"UserMessageStatus","description":"用户消息状态"},"UserMessageType":{"type":"string","enum":["invitation","application"],"title":"UserMessageType","description":"用户消息类型"},"UserMsg":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"senderId":{"type":"string","format":"uuid","title":"Senderid","description":"发送者ID"},"senderName":{"type":"string","title":"Sendername","description":"发送者名称"},"receiverId":{"type":"string","format":"uuid","title":"Receiverid","description":"接收者ID"},"receiverName":{"type":"string","title":"Receivername","description":"接收者名称"},"msgType":{"allOf":[{"$ref":"#/components/schemas/UserMessageType"}],"description":"消息类型"},"msgStatus":{"allOf":[{"$ref":"#/components/schemas/UserMessageStatus"}],"description":"消息状态"},"createdTime":{"type":"string","title":"Createdtime","description":"创建时间"}},"type":"object","required":["teamId","msgId","senderId","senderName","receiverId","receiverName","msgType","msgStatus","createdTime"],"title":"UserMsg","description":"用户消息"},"UserStatus":{"type":"string","enum":["active","inactive","deleted"],"title":"UserStatus","description":"用户状态"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"action":{"properties":{"actionName":{"type":"string","title":"Actionname","description":"操作名称","max":20,"min":1},"action":{"type":"string","title":"Action","description":"操作","max":20,"min":1},"isUsed":{"type":"boolean","title":"Isused","description":"是否启用"}},"type":"object","required":["actionName","action","isUsed"],"title":"action","description":"操作信息"},"role":{"properties":{"roleId":{"type":"string","format":"uuid","title":"Roleid","description":"角色ID"},"roleName":{"type":"string","title":"Rolename","description":"角色名称","max":20,"min":1},"typeActions":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Typeactions","description":"操作类型列表","default":[]}},"type":"object","required":["roleId","roleName"],"title":"role","description":"角色信息"}}}} \ No newline at end of file +{"openapi":"3.1.0","info":{"title":"FastAPI","version":"0.1.0"},"paths":{"/routes":{"get":{"summary":"Get All Routes","operationId":"get_all_routes_routes_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/team/list":{"post":{"tags":["Team"],"summary":"List Teams","operationId":"list_teams_team_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/usr":{"post":{"tags":["Team"],"summary":"List Team User By Team Id","operationId":"list_team_user_by_team_id_team_usr_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_usr_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamUserRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team User By Team Id And User Subs","operationId":"delete_team_user_by_team_id_and_user_subs_team_usr_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string"},"title":"Usersub"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/msg":{"post":{"tags":["Team"],"summary":"List Team Msg By Team Id","operationId":"list_team_msg_by_team_id_team_msg_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTeamMsgResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team":{"post":{"tags":["Team"],"summary":"Create Team","operationId":"create_team_team_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Team"],"summary":"Update Team By Team Id","operationId":"update_team_by_team_id_team_put","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Team"],"summary":"Delete Team By Team Id","operationId":"delete_team_by_team_id_team_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/invitation":{"post":{"tags":["Team"],"summary":"Invite Team User By User Sub","operationId":"invite_team_user_by_user_sub_team_invitation_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"userSubInvite","in":"query","required":true,"schema":{"type":"string","title":"Usersubinvite"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/InviteTeamUserResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/application":{"post":{"tags":["Team"],"summary":"Join Team","operationId":"join_team_team_application_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JoinTeamResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/team/author":{"put":{"tags":["Team"],"summary":"Update Team Author By Team Id","operationId":"update_team_author_by_team_id_team_author_put","parameters":[{"name":"recriverSub","in":"query","required":true,"schema":{"type":"string","title":"Recriversub"}},{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTeamAuthorResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb":{"get":{"tags":["Knowledge Base"],"summary":"List Kb By User Sub","operationId":"list_kb_by_user_sub_kb_get","parameters":[{"name":"kbName","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListAllKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"tags":["Knowledge Base"],"summary":"Create Kb","operationId":"create_kb_kb_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Knowledge Base"],"summary":"Update Kb By Kb Id","operationId":"update_kb_by_kb_id_kb_put","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Knowledge Base"],"summary":"Delete Kb By Kb Ids","operationId":"delete_kb_by_kb_ids_kb_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/team":{"post":{"tags":["Knowledge Base"],"summary":"List Kb By Team Id","operationId":"list_kb_by_team_id_kb_team_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/doc_type":{"get":{"tags":["Knowledge Base"],"summary":"List Doc Types By Kb Id","operationId":"list_doc_types_by_kb_id_kb_doc_type_get","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentTypesResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/download":{"get":{"tags":["Knowledge Base"],"summary":"Download Kb By Task Id","operationId":"download_kb_by_task_id_kb_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/import":{"post":{"tags":["Knowledge Base"],"summary":"Import Kbs","operationId":"import_kbs_kb_import_post","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_kbs_kb_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/kb/export":{"post":{"tags":["Knowledge Base"],"summary":"Export Kb By Kb Ids","operationId":"export_kb_by_kb_ids_kb_export_post","parameters":[{"name":"kbIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Kbids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportKnowledgeBaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/list":{"post":{"tags":["Chunk"],"summary":"List Chunks By Document Id","operationId":"list_chunks_by_document_id_chunk_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/search":{"post":{"tags":["Chunk"],"summary":"Search Chunks","operationId":"search_chunks_chunk_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchChunkRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SearchChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/":{"put":{"tags":["Chunk"],"summary":"Update Chunk By Id","operationId":"update_chunk_by_id_chunk__put","parameters":[{"name":"chunkId","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Chunkid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/chunk/switch":{"put":{"tags":["Chunk"],"summary":"Update Chunk Enabled By Id","operationId":"update_chunk_enabled_by_id_chunk_switch_put","parameters":[{"name":"enabled","in":"query","required":true,"schema":{"type":"boolean","title":"Enabled"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Chunkid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateChunkEnabledResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/list":{"post":{"tags":["Document"],"summary":"List Doc","operationId":"list_doc_doc_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/download":{"get":{"tags":["Document"],"summary":"Download Doc By Id","operationId":"download_doc_by_id_doc_download_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/report":{"get":{"tags":["Document"],"summary":"Get Doc Report","operationId":"get_doc_report_doc_report_get","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GetDocumentReportResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc":{"post":{"tags":["Document"],"summary":"Upload Docs","operationId":"upload_docs_doc_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_upload_docs_doc_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UploadDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Document"],"summary":"Update Doc By Doc Id","operationId":"update_doc_by_doc_id_doc_put","parameters":[{"name":"docId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Docid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/parse":{"post":{"tags":["Document"],"summary":"Parse Docuement By Doc Ids","operationId":"parse_docuement_by_doc_ids_doc_parse_post","parameters":[{"name":"parse","in":"query","required":true,"schema":{"type":"boolean","title":"Parse"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Docids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ParseDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/doc/switch":{"put":{"tags":["Document"],"summary":"Delete Docs By Ids","operationId":"delete_docs_by_ids_doc_switch_put","requestBody":{"content":{"application/json":{"schema":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Docids"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDocumentResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/health_check":{"get":{"tags":["Health check"],"summary":"Health Check","operationId":"health_check_health_check_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/dataset/list":{"post":{"tags":["Dataset"],"summary":"List Dataset By Kb Id","operationId":"list_dataset_by_kb_id_dataset_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/data":{"post":{"tags":["Dataset"],"summary":"List Data In Dataset","operationId":"list_data_in_dataset_dataset_data_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListDataInDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Data By Dataset Id","operationId":"update_data_by_dataset_id_dataset_data_put","parameters":[{"name":"dataId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Dataid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Data By Data Ids","operationId":"delete_data_by_data_ids_dataset_data_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Dataids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDataResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/testing/exist":{"get":{"tags":["Dataset"],"summary":"Is Dataset Have Testing","operationId":"is_dataset_have_testing_dataset_testing_exist_get","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/IsDatasetHaveTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/download":{"get":{"tags":["Dataset"],"summary":"Download Dataset By Task Id","operationId":"download_dataset_by_task_id_dataset_download_get","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset":{"post":{"tags":["Dataset"],"summary":"Create Dataset","operationId":"create_dataset_dataset_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Dataset"],"summary":"Update Dataset By Dataset Id","operationId":"update_dataset_by_dataset_id_dataset_put","parameters":[{"name":"databaseId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Databaseid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Dataset"],"summary":"Delete Dataset By Dataset Ids","operationId":"delete_dataset_by_dataset_ids_dataset_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Databaseid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/import":{"post":{"tags":["Dataset"],"summary":"Import Dataset","operationId":"import_dataset_dataset_import_post","parameters":[{"name":"kbId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Kbid"}}],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_import_dataset_dataset_import_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/export":{"post":{"tags":["Dataset"],"summary":"Export Dataset By Dataset Ids","operationId":"export_dataset_by_dataset_ids_dataset_export_post","parameters":[{"name":"datasetIds","in":"query","required":true,"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Datasetids"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExportDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/dataset/generate":{"post":{"tags":["Dataset"],"summary":"Generate Dataset By Id","operationId":"generate_dataset_by_id_dataset_generate_post","parameters":[{"name":"datasetId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Datasetid"}},{"name":"generate","in":"query","required":true,"schema":{"type":"boolean","title":"Generate"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateDatasetResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/other/llm":{"get":{"tags":["Other"],"summary":"List Llms By User Sub","operationId":"list_llms_by_user_sub_other_llm_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListLLMResponse"}}}}}}},"/other/embedding":{"get":{"tags":["Other"],"summary":"List Embeddings","operationId":"list_embeddings_other_embedding_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListEmbeddingResponse"}}}}}}},"/other/tokenizer":{"get":{"tags":["Other"],"summary":"List Tokenizers","operationId":"list_tokenizers_other_tokenizer_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTokenizerResponse"}}}}}}},"/other/parse_method":{"get":{"tags":["Other"],"summary":"List Parse Method","operationId":"list_parse_method_other_parse_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListParseMethodResponse"}}}}}}},"/other/search_method":{"get":{"tags":["Other"],"summary":"List Search Method","operationId":"list_search_method_other_search_method_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListSearchMethodResponse"}}}}}}},"/testing/list":{"post":{"tags":["Testing"],"summary":"List Testing By Kb Id","operationId":"list_testing_by_kb_id_testing_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/testcase":{"post":{"tags":["Testing"],"summary":"List Testcase By Testing Id","operationId":"list_testcase_by_testing_id_testing_testcase_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTestCaseResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/download":{"get":{"tags":["Testing"],"summary":"Download Testing Report By Testing Id","operationId":"download_testing_report_by_testing_id_testing_download_get","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing":{"post":{"tags":["Testing"],"summary":"Create Testing","operationId":"create_testing_testing_post","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateTestingResponsing"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Testing"],"summary":"Update Testing By Testing Id","operationId":"update_testing_by_testing_id_testing_put","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Testing"],"summary":"Delete Testing By Testing Ids","operationId":"delete_testing_by_testing_ids_testing_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Testingids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/testing/run":{"post":{"tags":["Testing"],"summary":"Run Testing By Testing Id","operationId":"run_testing_by_testing_id_testing_run_post","parameters":[{"name":"testingId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Testingid"}},{"name":"run","in":"query","required":true,"schema":{"type":"boolean","title":"Run"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/RunTestingResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role/action":{"get":{"tags":["Role"],"summary":"List Actions","operationId":"list_actions_role_action_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListActionResponse"}}}}}}},"/role/list":{"post":{"tags":["Role"],"summary":"List Role By Team Id","operationId":"list_role_by_team_id_role_list_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/role":{"post":{"tags":["Role"],"summary":"Create Role","operationId":"create_role_role_post","parameters":[{"name":"TeamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["Role"],"summary":"Update Role By Role Id","operationId":"update_role_by_role_id_role_put","parameters":[{"name":"roleId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Roleid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["Role"],"summary":"Delete Role By Role Ids","operationId":"delete_role_by_role_ids_role_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Roleid"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteRoleResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg/list":{"post":{"tags":["User Message"],"summary":"List User Msgs By User Sub","operationId":"list_user_msgs_by_user_sub_usr_msg_list_post","parameters":[{"name":"msgType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserMessageType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/usr_msg":{"put":{"tags":["User Message"],"summary":"Update User Msg By Msg Id","operationId":"update_user_msg_by_msg_id_usr_msg_put","parameters":[{"name":"msgId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Msgid"}},{"name":"msgStatus","in":"query","required":true,"schema":{"$ref":"#/components/schemas/UserStatus"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["User Message"],"summary":"Delete User Msg By Msg Ids","operationId":"delete_user_msg_by_msg_ids_usr_msg_delete","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"array","items":{"type":"string","format":"uuid"},"title":"Msgids"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteUserMessageResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task":{"post":{"tags":["Task"],"summary":"List Task","operationId":"list_task_task_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListTaskResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/one":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Id","operationId":"delete_task_by_task_id_task_one_delete","parameters":[{"name":"taskId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Taskid"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByIdResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/task/all":{"delete":{"tags":["Task"],"summary":"Delete Task By Task Type","operationId":"delete_task_by_task_type_task_all_delete","parameters":[{"name":"teamId","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Teamid"}},{"name":"taskType","in":"query","required":true,"schema":{"$ref":"#/components/schemas/TaskType"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteTaskByTypeResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"ActionType":{"type":"string","enum":["team","user","role","knowledge_base","document","chunk","dataset","testing","dataset_data","task"],"title":"ActionType","description":"操作类型"},"Body_import_dataset_dataset_import_post":{"properties":{"dataset_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Dataset Packages"}},"type":"object","required":["dataset_packages"],"title":"Body_import_dataset_dataset_import_post"},"Body_import_kbs_kb_import_post":{"properties":{"kb_packages":{"items":{"type":"string","format":"binary"},"type":"array","title":"Kb Packages"}},"type":"object","required":["kb_packages"],"title":"Body_import_kbs_kb_import_post"},"Body_upload_docs_doc_post":{"properties":{"docs":{"items":{"type":"string","format":"binary"},"type":"array","title":"Docs"}},"type":"object","required":["docs"],"title":"Body_upload_docs_doc_post"},"Chunk":{"properties":{"chunkId":{"type":"string","format":"uuid","title":"Chunkid","description":"分片ID"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"分片类型"},"text":{"type":"string","title":"Text","description":"分片文本"}},"type":"object","required":["chunkId","chunkType","text"],"title":"Chunk","description":"文档分片信息"},"ChunkType":{"type":"string","enum":["text","table","image","code","link","qa","json","unknown"],"title":"ChunkType","description":"分块类型"},"CreateDatasetRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""},"documentIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Documentids","description":"测试数据集关联的文档","default":[]},"dataCnt":{"type":"integer","title":"Datacnt","description":"测试数据集内的数据数量","default":64,"max":512,"min":1},"llmId":{"type":"string","title":"Llmid","description":"测试数据集使用的大模型id"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"测试数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"测试数据集进行上下文关联","default":false}},"type":"object","required":["kbId","llmId"],"title":"CreateDatasetRequest"},"CreateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集生成任务ID"}},"type":"object","title":"CreateDatasetResponse","description":"POST /dataset 响应"},"CreateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":20,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"知识库使用的embedding模型","default":""},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"CreateKnowledgeBaseRequest"},"CreateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"CreateKnowledgeBaseResponse","description":"POST /kb 响应"},"CreateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"CreateRoleRequest"},"CreateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"CreateRoleResponse","description":"POST /role 响应"},"CreateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"CreateTeamRequest"},"CreateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"CreateTeamResponse","description":"POST /team 响应"},"CreateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":30,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"测试数据集id"},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_and_vector"},"topK":{"type":"integer","title":"Topk","description":"测试中检索方法关联的片段数量","default":5}},"type":"object","required":["datasetId","llmId"],"title":"CreateTestingRequest"},"CreateTestingResponsing":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"CreateTestingResponsing","description":"POST /testing 响应"},"Data":{"properties":{"dataId":{"type":"string","format":"uuid","title":"Dataid","description":"数据ID"},"docName":{"type":"string","title":"Docname","description":"数据关联的文档名称"},"question":{"type":"string","title":"Question","description":"数据的问题"},"answer":{"type":"string","title":"Answer","description":"数据的答案"},"chunk":{"type":"string","title":"Chunk","description":"数据的片段"},"chunkType":{"allOf":[{"$ref":"#/components/schemas/ChunkType"}],"description":"数据的片段类型"}},"type":"object","required":["dataId","docName","question","answer","chunk","chunkType"],"title":"Data"},"DataSetStatus":{"type":"string","enum":["idle","pending","generating","exporting","importing","deleted"],"title":"DataSetStatus","description":"数据集状态"},"Dataset":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"数据集描述","max":150},"dataCnt":{"type":"integer","title":"Datacnt","description":"数据集数据数量"},"isDataCleared":{"type":"boolean","title":"Isdatacleared","description":"数据集是否进行清洗","default":false},"isChunkRelated":{"type":"boolean","title":"Ischunkrelated","description":"数据集进行上下文关联","default":false},"isImported":{"type":"boolean","title":"Isimported","description":"数据集是否导入","default":false},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"生成数据集使用的大模型信息"},"generateTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"数据集生成任务"},"score":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Score","description":"数据集评分"},"authorName":{"type":"string","title":"Authorname","description":"数据集创建者的用户名"},"status":{"$ref":"#/components/schemas/DataSetStatus"}},"type":"object","required":["datasetId","datasetName","description","dataCnt","authorName","status"],"title":"Dataset","description":"数据集信息"},"DatasetTesting":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集ID"},"datasetName":{"type":"string","title":"Datasetname","description":"数据集名称"},"testings":{"items":{"$ref":"#/components/schemas/Testing"},"type":"array","title":"Testings","description":"测试列表","default":[]}},"type":"object","required":["datasetId","datasetName"],"title":"DatasetTesting","description":"数据集测试信息"},"DeleteDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据ID列表","default":[]}},"type":"object","title":"DeleteDataResponse","description":"DELETE /dataset/data 响应"},"DeleteDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"数据集ID列表","default":[]}},"type":"object","title":"DeleteDatasetResponse","description":"DELETE /dataset 响应"},"DeleteDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"DeleteDocumentResponse","description":"DELETE /doc 响应"},"DeleteKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"知识库ID列表","default":[]}},"type":"object","title":"DeleteKnowledgeBaseResponse","description":"DELETE /kb 响应"},"DeleteRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"角色ID列表","default":[]}},"type":"object","title":"DeleteRoleResponse","description":"DELETE /role 响应"},"DeleteTaskByIdResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"任务ID"}},"type":"object","title":"DeleteTaskByIdResponse","description":"DELETE /task/one 响应"},"DeleteTaskByTypeResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"DeleteTaskByTypeResponse","description":"DELETE /task/all 响应"},"DeleteTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"DeleteTeamResponse","description":"DELETE /team 响应"},"DeleteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"团队成员ID列表","default":[]}},"type":"object","title":"DeleteTeamUserResponse","description":"DELETE /team/usr 响应"},"DeleteTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"测试ID列表","default":[]}},"type":"object","title":"DeleteTestingResponse","description":"DELETE /testing 响应"},"DeleteUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"消息ID列表","default":[]}},"type":"object","title":"DeleteUserMessageResponse","description":"DELETE /usr_msg 响应"},"DocChunk":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","required":["docId","docName"],"title":"DocChunk","description":"Post /chunk/search 数据结构"},"Document":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档ID"},"docName":{"type":"string","title":"Docname","description":"文档名称"},"docType":{"allOf":[{"$ref":"#/components/schemas/DocumentType-Output"}],"description":"文档类型"},"chunkSize":{"type":"integer","title":"Chunksize","description":"文档分片大小"},"createdTime":{"type":"string","title":"Createdtime","description":"文档创建时间"},"docTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"文档任务"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"文档解析方法"},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用"},"authorName":{"type":"string","title":"Authorname","description":"文档创建者的用户名"},"status":{"$ref":"#/components/schemas/DocumentStatus"}},"type":"object","required":["docId","docName","docType","chunkSize","createdTime","parseMethod","enabled","authorName","status"],"title":"Document","description":"文档信息"},"DocumentStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"DocumentStatus","description":"文档状态"},"DocumentType-Input":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"docTypeName":{"type":"string","maxLength":20,"minLength":1,"title":"Doctypename","default":"这是一个默认的文档类型名称"}},"type":"object","title":"DocumentType"},"DocumentType-Output":{"properties":{"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型ID"},"docTypeName":{"type":"string","title":"Doctypename","description":"文档类型名称"}},"type":"object","required":["docTypeId","docTypeName"],"title":"DocumentType","description":"文档类型信息"},"ExportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ExportDatasetResponse","description":"POST /dataset/export 响应"},"ExportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ExportKnowledgeBaseResponse","description":"POST /kb/export 响应"},"GenerateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"GenerateDatasetResponse","description":"POST /dataset/generate 响应"},"GetDocumentReportResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","title":"Result","description":"文档报告数据结构","default":""}},"type":"object","title":"GetDocumentReportResponse","description":"GET /doc/report 响应"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImportDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID列表","default":[]}},"type":"object","title":"ImportDatasetResponse","description":"POST /dataset/import 响应"},"ImportKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"任务ID","default":[]}},"type":"object","title":"ImportKnowledgeBaseResponse","description":"POST /kb/import 响应"},"InviteTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"邀请ID"}},"type":"object","title":"InviteTeamUserResponse","description":"POST /team/invitation 响应"},"IsDatasetHaveTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"boolean","title":"Result","description":"数据集是否有测试任务","default":false}},"type":"object","title":"IsDatasetHaveTestingResponse","description":"GET /dataset/testing/exist 响应"},"JoinTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"申请ID"}},"type":"object","title":"JoinTeamResponse","description":"POST /team/application 响应"},"Knowledgebase":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"知识库ID"},"kbName":{"type":"string","title":"Kbname","description":"知识库名称","max":20,"min":1},"authorName":{"type":"string","title":"Authorname","description":"知识库创建者的用户名"},"tokenizer":{"$ref":"#/components/schemas/Tokenizer"},"embeddingModel":{"type":"string","title":"Embeddingmodel","description":"嵌入模型"},"description":{"type":"string","title":"Description","description":"知识库描述","max":150},"docCnt":{"type":"integer","title":"Doccnt","description":"知识库文档数量"},"docSize":{"type":"integer","title":"Docsize","description":"知识库文档大小"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库单次文件上传数量限制"},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库单次文件上传大小限制"},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"默认解析方法"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"默认分块大小"},"createdTime":{"type":"string","title":"Createdtime","description":"知识库创建时间"},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Doctypes","description":"知识库文档类型列表","default":[]}},"type":"object","required":["kbId","kbName","authorName","tokenizer","embeddingModel","description","docCnt","docSize","uploadCountLimit","uploadSizeLimit","defaultParseMethod","defaultChunkSize","createdTime"],"title":"Knowledgebase","description":"知识库信息"},"LLM":{"properties":{"llmId":{"type":"string","title":"Llmid","description":"大模型ID"},"llmName":{"type":"string","title":"Llmname","description":"大模型名称","max":20,"min":1},"llmIcon":{"type":"string","title":"Llmicon","description":"大模型图标"}},"type":"object","required":["llmId","llmName","llmIcon"],"title":"LLM"},"ListActionMsg":{"properties":{"actionTypes":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Actiontypes","description":"操作类型列表","default":[]}},"type":"object","title":"ListActionMsg","description":"GET /role/action 数据结构"},"ListActionResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListActionMsg"}],"description":"操作列表数据结构","default":{"actionTypes":[]}}},"type":"object","title":"ListActionResponse"},"ListAllKnowledgeBaseMsg":{"properties":{"teamKnowledgebases":{"items":{"$ref":"#/components/schemas/TeamKnowledgebase"},"type":"array","title":"Teamknowledgebases","description":"团队知识库列表","default":[]}},"type":"object","title":"ListAllKnowledgeBaseMsg","description":"GET /kb 数据结构"},"ListAllKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListAllKnowledgeBaseMsg"}],"description":"团队知识库列表数据结构","default":{"teamKnowledgebases":[]}}},"type":"object","title":"ListAllKnowledgeBaseResponse","description":"GET /kb 响应"},"ListChunkMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"chunks":{"items":{"$ref":"#/components/schemas/Chunk"},"type":"array","title":"Chunks","description":"分片列表","default":[]}},"type":"object","title":"ListChunkMsg","description":"GET /chunk 数据结构"},"ListChunkRequest":{"properties":{"docId":{"type":"string","format":"uuid","title":"Docid","description":"文档id"},"text":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Text","description":"分块文本内容"},"types":{"anyOf":[{"items":{"$ref":"#/components/schemas/ChunkType"},"type":"array"},{"type":"null"}],"title":"Types","description":"分块类型"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["docId"],"title":"ListChunkRequest"},"ListChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListChunkMsg"}],"description":"分片列表数据结构","default":{"total":0,"chunks":[]}}},"type":"object","title":"ListChunkResponse","description":"GET /chunk 响应"},"ListDataInDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datas":{"items":{"$ref":"#/components/schemas/Data"},"type":"array","title":"Datas","description":"数据列表","default":[]}},"type":"object","title":"ListDataInDatasetMsg","description":"GET /dataset/data 数据结构"},"ListDataInDatasetRequest":{"properties":{"datasetId":{"type":"string","format":"uuid","title":"Datasetid","description":"数据集id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["datasetId"],"title":"ListDataInDatasetRequest"},"ListDataInDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDataInDatasetMsg"}],"description":"数据列表数据结构","default":{"total":0,"datas":[]}}},"type":"object","title":"ListDataInDatasetResponse","description":"GET /dataset/data 响应"},"ListDatasetMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasets":{"items":{"$ref":"#/components/schemas/Dataset"},"type":"array","title":"Datasets","description":"数据集列表","default":[]}},"type":"object","title":"ListDatasetMsg","description":"GET /dataset 数据结构"},"ListDatasetRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"datasetId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Datasetid","description":"数据集id"},"datasetName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Datasetname","description":"数据集名称"},"dataCnt":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集数据数量","default":"desc"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"数据集使用的大模型id"},"isDataCleared":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Isdatacleared","description":"数据集是否清洗"},"isChunkRelated":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Ischunkrelated","description":"数据集是否上下文关联"},"generateStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Generatestatus","description":"数据集生成状态"},"scoreOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"数据集评分的排序方法","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"数据集创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["kbId"],"title":"ListDatasetRequest"},"ListDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDatasetMsg"}],"description":"数据集列表数据结构","default":{"total":0,"datasets":[]}}},"type":"object","title":"ListDatasetResponse","description":"GET /dataset 响应"},"ListDocumentMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"documents":{"items":{"$ref":"#/components/schemas/Document"},"type":"array","title":"Documents","description":"文档列表","default":[]}},"type":"object","title":"ListDocumentMsg","description":"GET /doc 数据结构"},"ListDocumentRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"docId":{"anyOf":[{"type":"string","maxLength":30,"minLength":1,"format":"uuid"},{"type":"null"}],"title":"Docid","description":"文档id"},"docName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Docname","description":"文档名称"},"docTypeId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Doctypeid","description":"文档类型id"},"parseStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Parsestatus","description":"文档解析状态"},"parseMethod":{"anyOf":[{"$ref":"#/components/schemas/ParseMethod"},{"type":"null"}],"description":"文档解析方法"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"文档创建者"},"createdTimeStart":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimestart","description":"文档创建时间开始"},"createdTimeEnd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Createdtimeend","description":"文档创建时间结束"},"createdTimeOrder":{"allOf":[{"$ref":"#/components/schemas/OrderType"}],"description":"文档创建时间排序","default":"desc"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["kbId"],"title":"ListDocumentRequest"},"ListDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListDocumentMsg"}],"description":"文档列表数据结构","default":{"total":0,"documents":[]}}},"type":"object","title":"ListDocumentResponse","description":"GET /doc 响应"},"ListDocumentTypesResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"$ref":"#/components/schemas/DocumentType-Output"},"type":"array","title":"Result","description":"文档类型列表数据结构","default":[]}},"type":"object","title":"ListDocumentTypesResponse","description":"GET /kb/doc_type 响应"},"ListEmbeddingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"向量化模型的列表数据结构","default":[]}},"type":"object","title":"ListEmbeddingResponse","description":"GET /other/embedding 数据结构"},"ListKnowledgeBaseMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表数据结构","default":[]}},"type":"object","title":"ListKnowledgeBaseMsg"},"ListKnowledgeBaseRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队id"},"kbId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Kbid","description":"资产id"},"kbName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kbname","description":"资产名称"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"资产创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListKnowledgeBaseRequest"},"ListKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListKnowledgeBaseMsg"}],"default":{"total":0,"kbList":[]}}},"type":"object","title":"ListKnowledgeBaseResponse","description":"GET /kb/team 响应"},"ListLLMMsg":{"properties":{"llms":{"items":{"$ref":"#/components/schemas/LLM"},"type":"array","title":"Llms","description":"大模型列表","default":[]}},"type":"object","title":"ListLLMMsg","description":"GET /other/llm 数据结构"},"ListLLMResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListLLMMsg"}],"description":"大模型列表数据结构","default":{"llms":[]}}},"type":"object","title":"ListLLMResponse","description":"GET /other/llm 响应"},"ListParseMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"解析方法的列表数据结构","default":[]}},"type":"object","title":"ListParseMethodResponse","description":"\"GET /other/parse_method 响应"},"ListRoleMsg":{"properties":{"roles":{"items":{"$ref":"#/components/schemas/role"},"type":"array","title":"Roles","description":"角色列表","default":[]}},"type":"object","title":"ListRoleMsg","description":"GET /role 数据结构"},"ListRoleRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队id"},"roleId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Roleid","description":"角色id"},"roleName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rolename","description":"角色名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListRoleRequest"},"ListRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListRoleMsg"}],"description":"角色列表数据结构","default":{"roles":[]}}},"type":"object","title":"ListRoleResponse","description":"GET /role 响应"},"ListSearchMethodResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"搜索方法的列表数据结构","default":[]}},"type":"object","title":"ListSearchMethodResponse","description":"GET /other/search_method 响应"},"ListTaskMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"tasks":{"items":{"$ref":"#/components/schemas/Task"},"type":"array","title":"Tasks","description":"任务列表","default":[]}},"type":"object","title":"ListTaskMsg","description":"GET /task 数据结构"},"ListTaskRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队id"},"taskId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Taskid","description":"任务id"},"taskType":{"anyOf":[{"$ref":"#/components/schemas/TaskType"},{"type":"null"}],"description":"任务类型"},"taskStatus":{"anyOf":[{"$ref":"#/components/schemas/TaskStatus"},{"type":"null"}],"description":"任务状态"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListTaskRequest"},"ListTaskResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTaskMsg"}],"description":"任务列表数据结构","default":{"total":0,"tasks":[]}}},"type":"object","title":"ListTaskResponse","description":"GET /task 响应"},"ListTeamMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teams":{"items":{"$ref":"#/components/schemas/Team"},"type":"array","title":"Teams","description":"团队列表","default":[]}},"type":"object","title":"ListTeamMsg","description":"GET /team 数据结构"},"ListTeamMsgMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamMsgs":{"items":{"$ref":"#/components/schemas/TeamMsg"},"type":"array","title":"Teammsgs","description":"团队消息列表","default":[]}},"type":"object","title":"ListTeamMsgMsg","description":"GET /team/msg 数据结构"},"ListTeamMsgRequest":{"properties":{"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamMsgRequest"},"ListTeamMsgResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsgMsg"}],"description":"团队消息列表数据结构","default":{"total":0,"teamMsgs":[]}}},"type":"object","title":"ListTeamMsgResponse"},"ListTeamRequest":{"properties":{"teamType":{"allOf":[{"$ref":"#/components/schemas/TeamType"}],"description":"团队类型","default":"all"},"teamId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Teamid","description":"团队id"},"teamName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Teamname","description":"团队名称"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","title":"ListTeamRequest"},"ListTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamMsg"}],"description":"团队列表数据结构","default":{"total":0,"teams":[]}}},"type":"object","title":"ListTeamResponse","description":"GET /team 响应"},"ListTeamUserMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"teamUsers":{"items":{"$ref":"#/components/schemas/TeamUser"},"type":"array","title":"Teamusers","description":"团队成员列表","default":[]}},"type":"object","title":"ListTeamUserMsg","description":"GET /team/usr 数据结构"},"ListTeamUserRequest":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"userSub":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Usersub","description":"用户ID"},"userName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Username","description":"用户名"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["teamId"],"title":"ListTeamUserRequest"},"ListTeamUserResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTeamUserMsg"}],"description":"团队成员列表数据结构","default":{"total":0,"teamUsers":[]}}},"type":"object","title":"ListTeamUserResponse"},"ListTestCaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/TestingTestCase"}],"description":"测试用例列表数据结构","default":{"aveScore":-1.0,"avePre":-1.0,"aveRec":-1.0,"aveFai":-1.0,"aveRel":-1.0,"aveLcs":-1.0,"aveLeve":-1.0,"aveJac":-1.0,"testCases":[]}}},"type":"object","title":"ListTestCaseResponse","description":"GET /testing/testcase 响应"},"ListTestingMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"datasetTestings":{"items":{"$ref":"#/components/schemas/DatasetTesting"},"type":"array","title":"Datasettestings","description":"数据集测试列表","default":[]}},"type":"object","title":"ListTestingMsg","description":"GET /testing 数据结构"},"ListTestingRequest":{"properties":{"kbId":{"type":"string","format":"uuid","title":"Kbid","description":"资产id"},"testingId":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Testingid","description":"测试id"},"testingName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Testingname","description":"测试名称"},"llmId":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Llmid","description":"测试使用的大模型id"},"runStatus":{"anyOf":[{"items":{"$ref":"#/components/schemas/TaskStatus"},"type":"array"},{"type":"null"}],"title":"Runstatus","description":"测试运行状态"},"scoresOrder":{"anyOf":[{"$ref":"#/components/schemas/OrderType"},{"type":"null"}],"description":"测试评分","default":"desc"},"authorName":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authorname","description":"测试创建者"},"page":{"type":"integer","title":"Page","description":"页码","default":1},"pageSize":{"type":"integer","title":"Pagesize","description":"每页数量","default":40}},"type":"object","required":["kbId"],"title":"ListTestingRequest"},"ListTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListTestingMsg"}],"description":"测试列表数据结构","default":{"total":0,"datasetTestings":[]}}},"type":"object","title":"ListTestingResponse","description":"GET /testing 响应"},"ListTokenizerResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string"},"type":"array","title":"Result","description":"分词器的列表数据结构","default":[]}},"type":"object","title":"ListTokenizerResponse","description":"GET /other/tokenizer 响应"},"ListUserMessageMsg":{"properties":{"total":{"type":"integer","title":"Total","description":"总数","default":0},"userMessages":{"items":{"$ref":"#/components/schemas/UserMsg"},"type":"array","title":"Usermessages","description":"用户消息列表","default":[]}},"type":"object","title":"ListUserMessageMsg","description":"GET /usr_msg 数据结构"},"ListUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/ListUserMessageMsg"}],"description":"用户消息列表数据结构","default":{"total":0,"userMessages":[]}}},"type":"object","title":"ListUserMessageResponse"},"OrderType":{"type":"string","enum":["asc","desc"],"title":"OrderType","description":"排序"},"ParseDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"ParseDocumentResponse","description":"POST /doc/parse 响应"},"ParseMethod":{"type":"string","enum":["general","ocr","enhanced","qa"],"title":"ParseMethod","description":"解析方法"},"RunTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"RunTestingResponse","description":"POST /testing/run 响应"},"SearchChunkMsg":{"properties":{"docChunks":{"items":{"$ref":"#/components/schemas/DocChunk"},"type":"array","title":"Docchunks","description":"文档分片列表","default":[]}},"type":"object","title":"SearchChunkMsg","description":"Post /chunk/search 数据结构"},"SearchChunkRequest":{"properties":{"kbIds":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Kbids","description":"资产id","default":[]},"query":{"type":"string","title":"Query","description":"查询内容","default":""},"topK":{"type":"integer","title":"Topk","description":"返回的结果数量","default":5},"docIds":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Docids","description":"文档id"},"bannedIds":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Bannedids","description":"禁止的分块id","default":[]},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"检索方法","default":"keyword_and_vector"},"isRelatedSurrounding":{"type":"boolean","title":"Isrelatedsurrounding","description":"是否关联上下文","default":true},"isClassifyByDoc":{"type":"boolean","title":"Isclassifybydoc","description":"是否按文档分类","default":false},"isRerank":{"type":"boolean","title":"Isrerank","description":"是否重新排序","default":false},"isCompress":{"type":"boolean","title":"Iscompress","description":"是否压缩","default":false},"tokensLimit":{"type":"integer","title":"Tokenslimit","description":"token限制","default":8192}},"type":"object","title":"SearchChunkRequest"},"SearchChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"allOf":[{"$ref":"#/components/schemas/SearchChunkMsg"}],"description":"文档分片列表数据结构","default":{"docChunks":[]}}},"type":"object","title":"SearchChunkResponse","description":"POST /chunk/search 响应"},"SearchMethod":{"type":"string","enum":["keyword","vector","keyword_and_vector","doc2chunk","doc2chunk_bfs","enhanced_by_llm"],"title":"SearchMethod","description":"搜索方法"},"Task":{"properties":{"opId":{"type":"string","format":"uuid","title":"Opid","description":"关联实体ID"},"opName":{"type":"string","title":"Opname","description":"关联实体的名称"},"taskId":{"type":"string","format":"uuid","title":"Taskid","description":"任务ID"},"taskStatus":{"$ref":"#/components/schemas/TaskStatus"},"taskType":{"$ref":"#/components/schemas/TaskType"},"taskCompleted":{"type":"number","title":"Taskcompleted","description":"任务完成度"},"createdTime":{"type":"string","title":"Createdtime","description":"任务创建时间"}},"type":"object","required":["opId","opName","taskId","taskStatus","taskType","taskCompleted","createdTime"],"title":"Task","description":"任务信息"},"TaskStatus":{"type":"string","enum":["pending","running","success","failed","canceled","deleted"],"title":"TaskStatus","description":"任务状态"},"TaskType":{"type":"string","enum":["doc_parse","kb_export","kb_import","dataset_export","dataset_import","dataset_generate","testing_run"],"title":"TaskType","description":"任务类型"},"Team":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","description":"团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","description":"团队描述"},"authorName":{"type":"string","title":"Authorname","description":"团队创建者的用户ID"},"memberCount":{"type":"integer","title":"Membercount","description":"团队成员数量"},"isPublic":{"type":"boolean","title":"Ispublic","description":"是否为公开团队"},"createdTime":{"type":"string","title":"Createdtime","description":"团队创建时间"}},"type":"object","required":["teamId","teamName","description","authorName","memberCount","isPublic","createdTime"],"title":"Team","description":"团队信息"},"TeamKnowledgebase":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"teamName":{"type":"string","title":"Teamname","description":"团队名称"},"kbList":{"items":{"$ref":"#/components/schemas/Knowledgebase"},"type":"array","title":"Kblist","description":"知识库列表","default":[]}},"type":"object","required":["teamId","teamName"],"title":"TeamKnowledgebase","description":"团队知识库信息"},"TeamMsg":{"properties":{"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"authorName":{"type":"string","title":"Authorname","description":"消息发送者的用户名"},"message":{"type":"string","title":"Message","description":"消息内容"}},"type":"object","required":["msgId","authorName","message"],"title":"TeamMsg","description":"团队信息"},"TeamType":{"type":"string","enum":["mycreated","myjoined","all"],"title":"TeamType","description":"团队类型"},"TeamUser":{"properties":{"userId":{"type":"string","format":"uuid","title":"Userid","description":"用户ID"},"userName":{"type":"string","title":"Username","description":"用户名"},"roleName":{"type":"string","title":"Rolename","description":"角色名称"}},"type":"object","required":["userId","userName","roleName"],"title":"TeamUser","description":"团队成员信息"},"TestCase":{"properties":{"testCaseId":{"type":"string","format":"uuid","title":"Testcaseid","description":"测试用例ID"},"question":{"type":"string","title":"Question","description":"问题"},"answer":{"type":"string","title":"Answer","description":"标准答案"},"llm_answer":{"type":"string","title":"Llm Answer","description":"大模型的回答"},"related_chunk":{"type":"string","title":"Related Chunk","description":"检索到的片段"},"doc_name":{"type":"string","title":"Doc Name","description":"来源文档"},"score":{"type":"number","title":"Score","description":"综合得分"},"pre":{"type":"number","title":"Pre","description":"精确率"},"rec":{"type":"number","title":"Rec","description":"召回率"},"fai":{"type":"number","title":"Fai","description":"忠实值"},"rel":{"type":"number","title":"Rel","description":"可解释性"},"lcs":{"type":"number","title":"Lcs","description":"最长公共子串得分"},"leve":{"type":"number","title":"Leve","description":"编辑距离得分"},"jac":{"type":"number","title":"Jac","description":"杰卡德相似系数"}},"type":"object","required":["testCaseId","question","answer","llm_answer","related_chunk","doc_name","score","pre","rec","fai","rel","lcs","leve","jac"],"title":"TestCase","description":"测试用例信息"},"Testing":{"properties":{"testingId":{"type":"string","format":"uuid","title":"Testingid","description":"测试ID"},"testingName":{"type":"string","title":"Testingname","description":"测试名称","max":20,"min":1},"description":{"type":"string","title":"Description","description":"测试描述","max":150},"llm":{"anyOf":[{"$ref":"#/components/schemas/LLM"},{"type":"null"}],"description":"测试使用的大模型信息"},"searchMethod":{"$ref":"#/components/schemas/SearchMethod"},"testingTask":{"anyOf":[{"$ref":"#/components/schemas/Task"},{"type":"null"}],"description":"测试任务"},"aveScore":{"type":"number","title":"Avescore","description":"综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"杰卡德相似系数","default":-1},"authorName":{"type":"string","title":"Authorname","description":"测试创建者的用户名"},"status":{"$ref":"#/components/schemas/TestingStatus"}},"type":"object","required":["testingId","testingName","description","searchMethod","authorName","status"],"title":"Testing","description":"测试信息"},"TestingStatus":{"type":"string","enum":["idle","pending","running","deleted"],"title":"TestingStatus","description":"测试状态"},"TestingTestCase":{"properties":{"aveScore":{"type":"number","title":"Avescore","description":"平均综合得分","default":-1},"avePre":{"type":"number","title":"Avepre","description":"平均精确率","default":-1},"aveRec":{"type":"number","title":"Averec","description":"平均召回率","default":-1},"aveFai":{"type":"number","title":"Avefai","description":"平均忠实值","default":-1},"aveRel":{"type":"number","title":"Averel","description":"平均可解释性","default":-1},"aveLcs":{"type":"number","title":"Avelcs","description":"平均最长公共子串得分","default":-1},"aveLeve":{"type":"number","title":"Aveleve","description":"平均编辑距离得分","default":-1},"aveJac":{"type":"number","title":"Avejac","description":"平均杰卡德相似系数","default":-1},"testCases":{"items":{"$ref":"#/components/schemas/TestCase"},"type":"array","title":"Testcases","description":"测试用例列表","default":[]}},"type":"object","title":"TestingTestCase","description":"GET /testing/testcase 数据结构"},"Tokenizer":{"type":"string","enum":["中文","en","mix"],"title":"Tokenizer","description":"分词器"},"TypeAction":{"properties":{"actionType":{"$ref":"#/components/schemas/ActionType"},"actions":{"items":{"$ref":"#/components/schemas/action"},"type":"array","title":"Actions","description":"操作列表","default":[]}},"type":"object","required":["actionType"],"title":"TypeAction","description":"不同类别的类别操作"},"UpdateChunkEnabledResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"分片ID列表","default":[]}},"type":"object","title":"UpdateChunkEnabledResponse","description":"PUT /chunk 响应"},"UpdateChunkRequest":{"properties":{"text":{"type":"string","title":"Text","description":"分块文本内容","default":"这是一个默认的分块文本"},"enabled":{"type":"boolean","title":"Enabled","description":"分块是否启用","default":true}},"type":"object","title":"UpdateChunkRequest"},"UpdateChunkResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"分片ID"}},"type":"object","title":"UpdateChunkResponse","description":"PUT /chunk 响应"},"UpdateDataRequest":{"properties":{"question":{"type":"string","maxLength":200,"minLength":1,"title":"Question","description":"问题","default":"这是一个默认的问题"},"answer":{"type":"string","maxLength":1000,"minLength":1,"title":"Answer","description":"答案","default":"这是一个默认的答案"}},"type":"object","title":"UpdateDataRequest"},"UpdateDataResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据ID"}},"type":"object","title":"UpdateDataResponse","description":"PUT /dataset/data 响应"},"UpdateDatasetRequest":{"properties":{"datasetName":{"type":"string","maxLength":30,"minLength":1,"title":"Datasetname","description":"测试数据集名称","default":"这是一个默认的数据集名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试数据集简介","default":""}},"type":"object","title":"UpdateDatasetRequest"},"UpdateDatasetResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"数据集ID"}},"type":"object","title":"UpdateDatasetResponse","description":"PUT /dataset 响应"},"UpdateDocumentRequest":{"properties":{"docName":{"type":"string","maxLength":150,"minLength":1,"title":"Docname","default":"这是一个默认的文档名称"},"docTypeId":{"type":"string","format":"uuid","title":"Doctypeid","description":"文档类型的id","default":"00000000-0000-0000-0000-000000000000"},"parseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"chunkSize":{"type":"integer","title":"Chunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"enabled":{"type":"boolean","title":"Enabled","description":"文档是否启用","default":true}},"type":"object","title":"UpdateDocumentRequest"},"UpdateDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"type":"string","format":"uuid","title":"Result","description":"文档ID"}},"type":"object","title":"UpdateDocumentResponse","description":"PUT /doc 响应"},"UpdateKnowledgeBaseRequest":{"properties":{"kbName":{"type":"string","maxLength":30,"minLength":1,"title":"Kbname","default":"这是一个默认的资产名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"tokenizer":{"allOf":[{"$ref":"#/components/schemas/Tokenizer"}],"default":"中文"},"defaultChunkSize":{"type":"integer","title":"Defaultchunksize","description":"知识库默认文件分块大小","default":512,"max":2048,"min":128},"defaultParseMethod":{"allOf":[{"$ref":"#/components/schemas/ParseMethod"}],"description":"知识库默认解析方法","default":"general"},"uploadCountLimit":{"type":"integer","title":"Uploadcountlimit","description":"知识库上传文件数量限制","default":128,"max":1024,"min":128},"uploadSizeLimit":{"type":"integer","title":"Uploadsizelimit","description":"知识库上传文件大小限制","default":512,"max":2048,"min":128},"docTypes":{"items":{"$ref":"#/components/schemas/DocumentType-Input"},"type":"array","title":"Doctypes","description":"知识库支持的文档类型","default":[]}},"type":"object","title":"UpdateKnowledgeBaseRequest"},"UpdateKnowledgeBaseResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"知识库ID"}},"type":"object","title":"UpdateKnowledgeBaseResponse","description":"PUT /kb 响应"},"UpdateRoleRequest":{"properties":{"roleName":{"type":"string","maxLength":30,"minLength":1,"title":"Rolename","default":"这是一个默认的角色名称"},"actions":{"items":{"type":"string"},"type":"array","title":"Actions","description":"角色拥有的操作的列表","default":[]}},"type":"object","title":"UpdateRoleRequest"},"UpdateRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"角色ID"}},"type":"object","title":"UpdateRoleResponse","description":"PUT /role 响应"},"UpdateTeamAuthorResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamAuthorResponse","description":"PUT /team/author 响应"},"UpdateTeamRequest":{"properties":{"teamName":{"type":"string","maxLength":30,"minLength":1,"title":"Teamname","default":"这是一个默认的团队名称"},"description":{"type":"string","maxLength":150,"title":"Description","default":""},"is_public":{"type":"boolean","title":"Is Public","default":false}},"type":"object","title":"UpdateTeamRequest"},"UpdateTeamResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队ID"}},"type":"object","title":"UpdateTeamResponse","description":"PUT /team 响应"},"UpdateTeamUserRoleResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"团队成员ID"}},"type":"object","title":"UpdateTeamUserRoleResponse","description":"PUT /team/usr 响应"},"UpdateTestingRequest":{"properties":{"testingName":{"type":"string","maxLength":150,"minLength":1,"title":"Testingname","description":"测试名称","default":"这是一个默认的测试名称"},"description":{"type":"string","maxLength":200,"title":"Description","description":"测试简介","default":""},"llmId":{"type":"string","title":"Llmid","description":"测试使用的大模型id"},"searchMethod":{"allOf":[{"$ref":"#/components/schemas/SearchMethod"}],"description":"测试使用的检索方法","default":"keyword_and_vector"},"topK":{"type":"integer","title":"Topk","description":"测试中检索方法关联的片段数量","default":5}},"type":"object","required":["llmId"],"title":"UpdateTestingRequest"},"UpdateTestingResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"测试ID"}},"type":"object","title":"UpdateTestingResponse","description":"PUT /testing 响应"},"UpdateUserMessageResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Result","description":"消息ID"}},"type":"object","title":"UpdateUserMessageResponse","description":"PUT /usr_msg 响应"},"UploadDocumentResponse":{"properties":{"code":{"type":"integer","title":"Code","description":"返回码","default":200},"message":{"type":"string","title":"Message","description":"返回信息","default":""},"result":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Result","description":"文档ID列表","default":[]}},"type":"object","title":"UploadDocumentResponse","description":"POST /doc 响应"},"UserMessageStatus":{"type":"string","enum":["unread","accepted","rejected"],"title":"UserMessageStatus","description":"用户消息状态"},"UserMessageType":{"type":"string","enum":["invitation","application"],"title":"UserMessageType","description":"用户消息类型"},"UserMsg":{"properties":{"teamId":{"type":"string","format":"uuid","title":"Teamid","description":"团队ID"},"msgId":{"type":"string","format":"uuid","title":"Msgid","description":"消息ID"},"senderId":{"type":"string","format":"uuid","title":"Senderid","description":"发送者ID"},"senderName":{"type":"string","title":"Sendername","description":"发送者名称"},"receiverId":{"type":"string","format":"uuid","title":"Receiverid","description":"接收者ID"},"receiverName":{"type":"string","title":"Receivername","description":"接收者名称"},"msgType":{"allOf":[{"$ref":"#/components/schemas/UserMessageType"}],"description":"消息类型"},"msgStatus":{"allOf":[{"$ref":"#/components/schemas/UserMessageStatus"}],"description":"消息状态"},"createdTime":{"type":"string","title":"Createdtime","description":"创建时间"}},"type":"object","required":["teamId","msgId","senderId","senderName","receiverId","receiverName","msgType","msgStatus","createdTime"],"title":"UserMsg","description":"用户消息"},"UserStatus":{"type":"string","enum":["active","inactive","deleted"],"title":"UserStatus","description":"用户状态"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"action":{"properties":{"actionName":{"type":"string","title":"Actionname","description":"操作名称","max":20,"min":1},"action":{"type":"string","title":"Action","description":"操作","max":20,"min":1},"isUsed":{"type":"boolean","title":"Isused","description":"是否启用"}},"type":"object","required":["actionName","action","isUsed"],"title":"action","description":"操作信息"},"role":{"properties":{"roleId":{"type":"string","format":"uuid","title":"Roleid","description":"角色ID"},"roleName":{"type":"string","title":"Rolename","description":"角色名称","max":20,"min":1},"typeActions":{"items":{"$ref":"#/components/schemas/TypeAction"},"type":"array","title":"Typeactions","description":"操作类型列表","default":[]}},"type":"object","required":["roleId","roleName"],"title":"role","description":"角色信息"}}}} \ No newline at end of file diff --git a/witchaind-dataset-import/9233c762-c7c5-43f4-8aa0-372273039622/188455f8-fce3-4f87-8f6f-59e266af7d91.xlsx b/witchaind-dataset-import/9233c762-c7c5-43f4-8aa0-372273039622/188455f8-fce3-4f87-8f6f-59e266af7d91.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d9c74775ef4624786f63a1c67490fc0bf9100cf7 GIT binary patch literal 5911 zcmZ`-1yodP*B(kGgrRd#Qk0={2r21SkZwj8x`d%aN)Q}EIwTd5(s4-X5DNApNJmR?cqF-wyZ0qcvK1i6YGKmZrrvzlG+OQUmWH5GlCxRBY2!B`0~U@24gQ zlS;@#ZqCx2C8M+1M8s4;WcB$#zPL6&fvR4cqJ?l!WG?Qv(;$r{H?`x6YQoO@blETx z?bW;qmZ6NYCZ_v&N=>-nd0A^mE94M9Wtc}h{nyzAo3&>t{XrnHUx_&nIL$W&`Le=G zgE`GZNZFGc2yLn6jW5W>y@E0mze4 z?OL?}qbVteI|fGkh7`sWZ^rh%)KNbE;jdOIbza)cO;h{%YV4SKOn7IVdxGcR?4%lJ zumEoW02%iH0CEgFa0ea_Tc{)S&&cVY_fJ3Q6t`^PEJc zhHh%y2_w~yaqzZ|3k_^g6b5!D-QJ8>uSu7{1+Sj<$}29fhZ0a`@;JITpfeuPBdQiW4=j~LGy(ebd1tRW|- zo`%94a%dE-&HYWmw$gn%+J??%cackX1db`Ybz-1-Smb&#?A4LTy%UGF^NHz)Q1q%8 zh(FkgmtOyFV6XGLMGeEqIi2R#Mmrm+r}iEF14=<5o&Jei2s3 z9ky;}zv`Os5Ok!QUD$<-qPhp7T^Mj@wQIAQGk-ZWU)8~nFAgNmAMjvp{ajG*s-(W1MR_AU58F@&k@N7=mfA@^V$RgsNlQ_AX7=w+hANuexu7Hgny?sKkt|4J_=V?;8 z2^zha^~#NuWiDk5?@iX&iq5`XY_BXDYMN-i(%D4H+rEX=0r`TyRVCUoW zCFO)4Bilcc&XW6A*5CRz6BBtQYs zhnBR$bZ%d$f>EwTq2`8U?fr0#0VL_U!AGHlhvwfldg7kHj@#+!;@zU_@hkTx)e zi>jn;=DOg5F8RO-M3UO;v`RyKv99_jM4F3_LmDB0-2Ui-?do5WIRlj467G*dvKu!pY#(OrZk~>_hL)unaIsJ!RD(au zuvVQ%ckMiA#b2>DV#Dv&8H9>Nk!8u!JFPxBPmyp`0k>?*i9sEBM}cmdA$V*ogJEZF0vHxK1r6VqLZ!1ajePw zAtsuY5?4(y_!!~baA~wFD^~9X_&x6)MfEMt8v!v)1NZZ*RUR)5hL=tZU9^)Q-c6$n{fuCBNdb zoNR2i&>z1rSqS~&RCeEs){*%!q#2e%Zd2Sf4XtR>1)+6~_R8z+B$1RF=u3l=Z`nko z8dQX2N>+)~g(|uGv`^}nIh9*=MH}P=P-Mw8bqhZ`Ts59lk$Iv>u=%TNAw@n=KAXb5 z87?Axuu-G|*}S^R=J0ZQMyU_8=V`K6llk6-OQ#~qlK8UOFB7@afWxVUQ>O%RCAAFi zHc~7Pd2E(93^%B#`4_de;t@B1Io5miov$3*S6u)UpN=k;pz6dddrs>5$ROx-2-vPn|FO>RtwZNGGaoiJ>2UZKQDEb@pFWGx=&TJ{KqT(5ylh#8HmfI*ffsmPjawa zen69dR8$aYv1NTtndS*whB(r9H~4P)`N(Ac;nXaV-%zn(fi)+ol=(g?QZ9#@OJX5|@ zlyKgn27$zn%kA6c&8@L>o6!we|6V|(bd9|P@Bn}=Isky?&jRA*>}Kz73x#^P^Za%D zvx*p~I!`J9>(BIn{D>!3o4xfc2GSe3N`w@=^ACfjZDEe2aY_;S>x(m9o;J#HQOuPQ z6kKlyBfGr=_Qxt9fu(CCJrifbXUM64 z75-w?@-jv0*l6bR6#isk5IOqk0jl01>ce-tz(9z1_l&XJ>}#S1%FAln^Iln2FWmzs ziSw$OjPhB;%rTT+a8c><*J!|J{HVT|*UHQJBDA;>4dRtNgeEd~~#sDB9 zOssg6IUFpe6->ZxElRC)H^aVD_pPxiZ7-y9*C6se&&qQgh#~cE|pjv z&LLj$(5b*tFQ7`gOKBX-UhrIhskzLZN=r{wR$kY;d1{d~y!zt4%j@E1l`PZ6bz{fq znF}2=V6rNAh1`zuofnF{knX;Gx7hReP{L%5FzR5ro@09icCT3QaaLHBPc^Y7Z_-ZA z@u8~%gmM=#BGZZnZW(h3i(3Z?sNA|0Ded0Y^9{hyNIAs(EQ)yz$4x^Z;E|f>4YMfa zL&Mnoloq#dAy-uU8Jqi8yJY(TyuP-|?BCzk@V4;{pACZ(D}H^isOb&&j)^xI(S0V& z`!g|zEXnWHT^dELTRpKX9EvC;RfYqkaH%f|P@*Jc>-GMkeBLTuW$9Z!`;7G37;SUA zgE=pMmpJ+7M=W#0)p*3>vb+evZ2xs8-jGHmMWB||FJK0gmTeA?p~f{gET`3|?J9UV zg~C2k3<3tm_Vqn_=@>Bqf50#hAzdVQ=$g+e_2j-1F^*)WqQ0mh+c57w!Fd3?~ID8+AC@1*pM~33>|)NCt{kZ1^`fO<6fs{ zT2Z*@;pmpb+bOayIt`?ER6^;7EJjU05=pn`Dg^Q46+y4X>BZ)LL>6&wEQIGu=q6XB zwa3ti44GoxViAoYPsZLbAMbwi>As${Zc&C>fRZ@!b>LK_HcntkXSH;)7x^5_ z!WeP!rdaU^3MQlBX|4-+BGYi0V9Jkrf(&43a=KTNf=#uE!_DycPdaoSI0quRLllwG zjuSH86V{sP8?XdP$4~Fn50RZ&!sCPNzJI;Ea!gT}Qj)YTjn&qPCC zwW!i)h>o#F5OQr6fGgi*Ft9 zZ_Z&z74^y(JrwMYh~h|q_?(Uc&@BX?85WR_>7#oEIYMWqsxEHqyk-F^nL zT@D>K(uc=-)d!E+He`q^ZJM8W*B_@C<6mMii(7G8Y+Hxee|?(oGV70q<oY@uk8`6KTp-D0==Cty2U1+V$gL?;6cGkN17loI=>0OZVY3*@&1DVjBMBmg#{_2StdA0~t zil?LnTf2$g8GY}pM79&Rt+L6>g!jS=1B{;2*5*NL$GiJyK@J!9Vzn}Y+DKbDKo4-D z$n_I@uROBYyt9I|tGV*N#V>-DHO$)(F;|bpTs5$=kCmgDyZ16atUM#{F_#@AMFzf$uFs}yfLDK4=!DL&MNokFzZvL$#)Lsf(imb9fyd))iNt)q>s_c ziL0#+LF^AJwpXr}$V;J+PLhoPz`v9CnRhBSKNbLR7c)_kKgs*MOR{u!w*Q^JG1~nq zEz-dAH!gR~2XgUL2dSeCfm}~d1Js3k25Kj(Dq&yDw%M8VciP^4%`^y4t!_&_uDU12 zZZ;j3l5Uuv`VI!R+Yz4D!jVah8Z+vDLjrT4aiFya>~d#58wVdyy)fWbD7pP^ln|dH zgo0ut2-FO@+?z_v;GJg#b$L0RR^lYUiB@*8p-%h>t!cgJR(Q_c5l2GyWj6uZNKm=? z?#*dWcBzerhmm6!wYn_Tsn7@dOp2>KKK{jwnr}N=MhGNx$!FhC!Y%FSi6S%v3AL90@^#|Du)~((lw1dU^)RN#GQ}0Ob%MzbwWm$ z%$+=RWW%t!^eA6O*^9S@Cpi5Q8=S1|{EF%mJ9;~21ZOge#p*yCTwbG>l`%cH1VigF zhGI+~`dh{L6H5Oi`LD405+_Ogf|n?)jBE)Yq*Ww`r)Hwh$qmHYXCWhNDG6EGT2k(Ne%h!9 zNiK$W=R5B@!k+qf~XjjD=w{~AXgFh(zL+p%}R;1m|>&O#C#X6+$0^I*!!;wNpx1&MHM)AZPxoe z_|)06cweDw$IqLd5q#@XCm@AX;0AVNvUE=ATC*rw7nZ@9J{@eF$!Hk8sE_MexFCXw zECyTZ|3n}Awo_lx_wJH5Z#76&)QxD+1^$ellQIp&Fo}gtf%E^1n3$OP>j=W|`+p_P z>*(u};vXyk5QJ6!PxSu^jMw4UHMhUvy_nzs7X|LRf$Mv&{}>1+x%HQU|GERaZsmGI z@gFN#a7=%JapUjy;yUzt-ToV@hLME-QN^zVuh+r9fkGH<7W2=4ChT?a^(_AzoJjT; p`2WfI>vpau(%*KxF%sy1Nh>W?Jd8vN0N`WJhnNc9@;i_K{{!_th>8FJ literal 0 HcmV?d00001 diff --git a/witchaind-dataset-import/dd3471bd-33bc-4075-9fa1-ab8d236e35b1/188455f8-fce3-4f87-8f6f-59e266af7d91.yaml b/witchaind-dataset-import/dd3471bd-33bc-4075-9fa1-ab8d236e35b1/188455f8-fce3-4f87-8f6f-59e266af7d91.yaml new file mode 100644 index 0000000..a9db39b --- /dev/null +++ b/witchaind-dataset-import/dd3471bd-33bc-4075-9fa1-ab8d236e35b1/188455f8-fce3-4f87-8f6f-59e266af7d91.yaml @@ -0,0 +1,13 @@ +- answer: 《图书出版的艺术和科学》的作者是小赫伯特·S·贝利(Herbert·S·Bailey,Jr.)。 + chunk: "【 文献号 】1-2432\n【原文出处】出版发行研究\n【原刊地名】京\n【原刊期号】199601\n【原刊页号】40-41\n【分 类 号】Z1\n\ + 【分 类 名】出版工作、图书评介\n【 作 者 】王益\n【复印期号】199604\n【 标 题 】美国出版社怎样经营管理?——介绍《图书出版的艺术和科学》\n\ + 【 正 文 】\n 美国出版的有关图书出版的专业书籍,概论性的有好几本,专讲经营管理的并不多,《图书出版的艺术和科学》(The Art and Scienceo\n\ + f Book Pblishing)是经常被人推荐的一本。作者小赫伯特·S·贝利(Herbert·S·Bailey,Jr.),在大学文学系毕业之后,1946年进入普林\n\ + 斯顿大学出版社当了8年编辑,1954年出任该出版社社长, 直至1986年退休,从事出版工作共40年。1970年,他写了这本书,1980年再版,199\n0年三版。这本书,广泛地被大学采用作出版课程教材,也被出版社工作人员选作参考读物。从1970年至1990年,时隔20年,他认为他所阐述的\n\ + 基本原则仍是正确的,因而出版时基本上没有修改。【 文献号 】1-2432\n【原文出处】出版发行研究\n【原刊地名】京\n【原刊期号】199601\n【原刊页号】40-41\n\ + 【分 类 号】Z1\n【分 类 名】出版工作、图书评介\n【 作 者 】王益\n【复印期号】199604\n【 标 题 】美国出版社怎样经营管理?——介绍《图书出版的艺术和科学》\n\ + 【 正 文 】\n 美国出版的有关图书出版的专业书籍,概论性的有好几本,专讲经营管理的并不多,《图书出版的艺术和科学》(The Art and Scienceo\n\ + f Book Pblishing)是经常被人推荐的一本。作者小赫伯特·S·贝利(Herbert·S·Bailey,Jr.),在大学文学系毕业之后,1946年进入普林\n\ + 斯顿大学出版社当了8年编辑,1954年出任该出版社社长, 直至1986年退休,从事出版工作共40年。1970年,他写了这本书,1980年再版,199\n0年三版。这本书,广泛地被大学采用作出版课程教材,也被出版社工作人员选作参考读物。从1970年至1990年,时隔20年,他认为他所阐述的\n\ + 基本原则仍是正确的,因而出版时基本上没有修改。有关出版工作的方方面面,它都涉及到了。" + question: 《图书出版的艺术和科学》的作者是谁? -- Gitee From ae91d780eafb5e5014de28973c058c520196b0fa Mon Sep 17 00:00:00 2001 From: zxstty Date: Thu, 15 May 2025 17:51:53 +0800 Subject: [PATCH 11/15] =?UTF-8?q?=E5=AE=8C=E5=96=84=E5=87=86=E7=A1=AE?= =?UTF-8?q?=E7=8E=87=E6=B5=8B=E8=AF=95=E7=9B=B8=E5=85=B3=E5=86=85=E5=AE=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data_chain/apps/app.py | 8 ++ .../base/task/worker/acc_testing_worker.py | 82 ++++++++---------- .../task/worker/generate_dataset_worker.py | 7 +- .../base/task/worker/import_dataset_worker.py | 1 + .../base/task/worker/parse_document_worker.py | 27 ++++-- data_chain/apps/router/acc_testing.py | 5 +- data_chain/apps/router/dataset.py | 1 - data_chain/apps/router/document.py | 6 +- .../apps/service/acc_testing_service.py | 33 ++++--- data_chain/apps/service/document_service.py | 3 +- data_chain/manager/chunk_manager.py | 34 +++++++- data_chain/manager/document_manager.py | 15 ++++ data_chain/manager/testing_manager.py | 10 +-- data_chain/parser/handler/pdf_parser.py | 1 - data_chain/rag/doc2chunk_bfs_searcher.py | 4 +- data_chain/rag/doc2chunk_searcher.py | 2 +- data_chain/stores/database/database.py | 6 +- .../188455f8-fce3-4f87-8f6f-59e266af7d91.xlsx | Bin 5911 -> 0 bytes .../188455f8-fce3-4f87-8f6f-59e266af7d91.yaml | 13 --- 19 files changed, 154 insertions(+), 104 deletions(-) delete mode 100644 witchaind-dataset-import/9233c762-c7c5-43f4-8aa0-372273039622/188455f8-fce3-4f87-8f6f-59e266af7d91.xlsx delete mode 100644 witchaind-dataset-import/dd3471bd-33bc-4075-9fa1-ab8d236e35b1/188455f8-fce3-4f87-8f6f-59e266af7d91.yaml diff --git a/data_chain/apps/app.py b/data_chain/apps/app.py index 9aa6cd9..4af925b 100644 --- a/data_chain/apps/app.py +++ b/data_chain/apps/app.py @@ -44,6 +44,14 @@ from data_chain.parser.handler import ( xlsx_parser, yaml_parser, ) +from data_chain.rag import ( + base_searcher, + keyword_searcher, + vector_searcher, + keyword_and_vector_searcher, + doc2chunk_searcher, + doc2chunk_bfs_searcher +) from data_chain.stores.database.database import DataBase, ActionEntity, DocumentTypeEntity from data_chain.manager.role_manager import RoleManager from data_chain.manager.document_type_manager import DocumentTypeManager diff --git a/data_chain/apps/base/task/worker/acc_testing_worker.py b/data_chain/apps/base/task/worker/acc_testing_worker.py index c7d7d2c..6be16f0 100644 --- a/data_chain/apps/base/task/worker/acc_testing_worker.py +++ b/data_chain/apps/base/task/worker/acc_testing_worker.py @@ -20,6 +20,7 @@ from data_chain.parser.handler.json_parser import JsonParser from data_chain.parser.handler.yaml_parser import YamlParser from data_chain.parser.handler.xlsx_parser import XlsxParser from data_chain.manager.task_manager import TaskManager +from data_chain.manager.knowledge_manager import KnowledgeBaseManager from data_chain.manager.document_manager import DocumentManager from data_chain.manager.chunk_manager import ChunkManager from data_chain.manager.dataset_manager import DatasetManager @@ -30,6 +31,7 @@ from data_chain.manager.task_queue_mamanger import TaskQueueManager from data_chain.stores.database.database import TaskEntity, QAEntity, DataSetEntity, DataSetDocEntity, TestingEntity, TestCaseEntity from data_chain.stores.minio.minio import MinIO from data_chain.stores.mongodb.mongodb import Task +from data_chain.config.config import config class TestingWorker(BaseWorker): @@ -83,16 +85,6 @@ class TestingWorker(BaseWorker): tmp_path = os.path.join(TESTING_REPORT_PATH_IN_OS, str(task_entity.id)) if os.path.exists(tmp_path): shutil.rmtree(tmp_path) - await TestingManager.update_testing_by_testing_id(task_entity.op_id, { - "ave_score": -1, - "ave_pre": -1, - "ave_rec": -1, - "ave_fai": -1, - "ave_rel": -1, - "ave_lcs": -1, - "ave_leve": -1, - "ave_jac": -1, - }) await TestCaseManager.update_test_case_by_testing_id(task_entity.op_id, {"status": TestCaseStatus.DELETED.value}) await MinIO.delete_object( TESTING_REPORT_PATH_IN_MINIO, @@ -141,7 +133,7 @@ class TestingWorker(BaseWorker): @staticmethod async def testing(testing_entity: TestingEntity, qa_entities: list[QAEntity], llm: LLM) -> list[TestCaseEntity]: '''测试数据集''' - test_case_entities = [] + testcase_entities = [] with open(config['PROMPT_PATH'], 'r', encoding='utf-8') as f: prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) prompt_template = prompt_dict.get('INTENT_DETECT_PROMPT_TEMPLATE', '') @@ -165,6 +157,7 @@ class TestingWorker(BaseWorker): bac_info += sub_bac_info+'\n' bac_info = TokenTool.get_k_tokens_words_from_content(bac_info, llm.max_tokens) prompt = prompt_template.format( + history='', question=question, bac_info=bac_info ) @@ -209,7 +202,7 @@ class TestingWorker(BaseWorker): if sub_socres: score = sum(sub_socres) / len(sub_socres) test_case_entity = TestCaseEntity( - qa_id=qa_entity.id, + testing_id=testing_entity.id, question=question, answer=answer, chunk=chunk, @@ -225,46 +218,46 @@ class TestingWorker(BaseWorker): leve=leve, jac=jac ) - test_case_entities.append(test_case_entity) + testcase_entities.append(test_case_entity) index = 0 - while index < len(test_case_entities): - await TestCaseManager.add_test_cases(test_case_entities[index:index+1024]) + while index < len(testcase_entities): + await TestCaseManager.add_test_cases(testcase_entities[index:index+1024]) index += 1024 - return test_case_entities + return testcase_entities @staticmethod - async def update_testing_score(testing_id: uuid.UUID, test_case_entities: list[TestCaseEntity]) -> None: + async def update_testing_score(testing_id: uuid.UUID, testcase_entities: list[TestCaseEntity]) -> TestingEntity: '''更新测试分数''' score_list = [] - for test_case_entity in test_case_entities: + for test_case_entity in testcase_entities: if test_case_entity.score != -1: score_list.append(test_case_entity.score) pre_list = [] - for test_case_entity in test_case_entities: + for test_case_entity in testcase_entities: if test_case_entity.pre != -1: pre_list.append(test_case_entity.pre) rec_list = [] - for test_case_entity in test_case_entities: + for test_case_entity in testcase_entities: if test_case_entity.rec != -1: rec_list.append(test_case_entity.rec) fai_list = [] - for test_case_entity in test_case_entities: + for test_case_entity in testcase_entities: if test_case_entity.fai != -1: fai_list.append(test_case_entity.fai) rel_list = [] - for test_case_entity in test_case_entities: + for test_case_entity in testcase_entities: if test_case_entity.rel != -1: rel_list.append(test_case_entity.rel) lcs_list = [] - for test_case_entity in test_case_entities: + for test_case_entity in testcase_entities: if test_case_entity.lcs != -1: lcs_list.append(test_case_entity.lcs) leve_list = [] - for test_case_entity in test_case_entities: + for test_case_entity in testcase_entities: if test_case_entity.leve != -1: leve_list.append(test_case_entity.leve) jac_list = [] - for test_case_entity in test_case_entities: + for test_case_entity in testcase_entities: if test_case_entity.jac != -1: jac_list.append(test_case_entity.jac) ave_score = -1 @@ -291,7 +284,7 @@ class TestingWorker(BaseWorker): ave_jac = -1 if jac_list: ave_jac = sum(jac_list) / len(jac_list) - await TestingManager.update_testing_by_testing_id(testing_id, { + testing_entity = await TestingManager.update_testing_by_testing_id(testing_id, { "ave_score": ave_score, "ave_pre": ave_pre, "ave_rec": ave_rec, @@ -301,6 +294,7 @@ class TestingWorker(BaseWorker): "ave_leve": ave_leve, "ave_jac": ave_jac }) + return testing_entity @staticmethod async def generate_report_and_upload_to_minio( @@ -308,26 +302,25 @@ class TestingWorker(BaseWorker): tmp_path: str): '''生成报告并上传到minio''' xlsx_path = os.path.join(tmp_path, "report.xlsx") - testing_doc_entities = await DatasetManager.list_dataset_document_by_dataset_id(testing_entity.dataset_id) - doc_ids = [] - for testing_doc_entity in testing_doc_entities: - doc_ids.append(testing_doc_entity.doc_id) - chunk_cnt = await ChunkManager.get_chunk_cnt_by_doc_ids(doc_ids) - chunk_tokens = await ChunkManager.get_chunk_tokens_by_doc_ids(doc_ids) + kb_entity = await KnowledgeBaseManager.get_knowledge_base_by_kb_id(testing_entity.kb_id) + doc_cnt = DocumentManager.get_doc_cnt_by_kb_id(testing_entity.kb_id) + chunk_cnt = await ChunkManager.get_chunk_cnt_by_kb_id(testing_entity.kb_id) + chunk_tokens = await ChunkManager.get_chunk_tokens_by_kb_id(testing_entity.kb_id) ave_chunk_tokens = 0 if chunk_cnt != 0: ave_chunk_tokens = chunk_tokens / chunk_cnt - config = { - 'dataset_name(数据集名称)': dataset_entity.name, - 'doc_cnt(文档数量)': len(testing_doc_entities), - 'chunk_cnt(分片数量)': chunk_cnt, - 'chunk_tokens(分片平均token数)': ave_chunk_tokens, + test_config = { + 'kb_name(知识库名称)': [kb_entity.name], + 'dataset_name(数据集名称)': [dataset_entity.name], + 'doc_cnt(文档数量)': [doc_cnt], + 'chunk_cnt(分片数量)': [chunk_cnt], + 'chunk_tokens(分片平均token数)': [ave_chunk_tokens], 'llm(大模型)': [config['MODEL_NAME']], 'embedding_model(向量检索)': [config['EMBEDDING_MODEL_NAME']], } - model_config_df = pd.DataFrame(config) + model_config_df = pd.DataFrame(test_config) ave_result = { - 'ave_score(平均综合得分)': [testing_entity.score], + 'ave_score(平均综合得分)': [testing_entity.ave_score], 'ave_pre(平均准确率)': [testing_entity.ave_pre], 'ave_rec(平均召回率)': [testing_entity.ave_rec], 'ave_fai(平均可信度)': [testing_entity.ave_fai], @@ -374,8 +367,9 @@ class TestingWorker(BaseWorker): ave_result_df.to_excel(writer, sheet_name='ave_result(平均结果)', index=False) test_case_df.to_excel(writer, sheet_name='test_case(测试结果)', index=False) await MinIO.put_object( - bucket_name=TESTING_REPORT_PATH_IN_MINIO, - object_name=str(testing_entity.id), + TESTING_REPORT_PATH_IN_MINIO, + str(testing_entity.id), + xlsx_path ) @staticmethod @@ -404,14 +398,14 @@ class TestingWorker(BaseWorker): current_stage += 1 await TestingWorker.report(task_id, "初始化路径", current_stage, stage_cnt) qa_entities = await QAManager.list_all_qa_by_dataset_id(testing_entity.dataset_id) - await TestingWorker.testing(testing_entity, qa_entities, llm) + testcase_entities = await TestingWorker.testing(testing_entity, qa_entities, llm) current_stage += 1 await TestingWorker.report(task_id, "测试完成", current_stage, stage_cnt) - await TestingWorker.update_testing_score(testing_entity.id, qa_entities) + testing_entity = await TestingWorker.update_testing_score(testing_entity.id, testcase_entities) current_stage += 1 await TestingWorker.report(task_id, "更新测试分数", current_stage, stage_cnt) dataset_entity = await DatasetManager.get_dataset_by_dataset_id(testing_entity.dataset_id) - await TestingWorker.generate_report_and_upload_to_minio(dataset_entity, testing_entity, qa_entities, tmp_path) + await TestingWorker.generate_report_and_upload_to_minio(dataset_entity, testing_entity, testcase_entities, tmp_path) current_stage += 1 await TestingWorker.report(task_id, "生成报告并上传到minio", current_stage, stage_cnt) await TaskQueueManager.add_task(Task(_id=task_id, status=TaskStatus.SUCCESS.value)) diff --git a/data_chain/apps/base/task/worker/generate_dataset_worker.py b/data_chain/apps/base/task/worker/generate_dataset_worker.py index 603c62d..d75f5cc 100644 --- a/data_chain/apps/base/task/worker/generate_dataset_worker.py +++ b/data_chain/apps/base/task/worker/generate_dataset_worker.py @@ -15,6 +15,7 @@ from data_chain.entities.enum import TaskType, TaskStatus, KnowledgeBaseStatus, from data_chain.entities.common import DEFAULt_DOC_TYPE_ID from data_chain.parser.tools.token_tool import TokenTool from data_chain.manager.task_manager import TaskManager +from data_chain.manager.document_manager import DocumentManager from data_chain.manager.chunk_manager import ChunkManager from data_chain.manager.dataset_manager import DatasetManager from data_chain.manager.qa_manager import QAManager @@ -97,6 +98,7 @@ class GenerateDataSetWorker(BaseWorker): dataset_doc_entities = await DatasetManager.list_dataset_document_by_dataset_id(dataset_entity.id) doc_chunks = [] for dataset_doc_entity in dataset_doc_entities: + doc_entity = await DocumentManager.get_document_by_doc_id(dataset_doc_entity.doc_id) chunk_entities = await ChunkManager.list_all_chunk_by_doc_id(dataset_doc_entity.doc_id) chunks = [] for chunk_entity in chunk_entities: @@ -105,8 +107,8 @@ class GenerateDataSetWorker(BaseWorker): type=chunk_entity.type )) doc_chunk = DocChunk( - doc_id=dataset_entity.id, - doc_name=dataset_entity.name, + doc_id=doc_entity.id, + doc_name=doc_entity.name, chunks=[] ) doc_chunk.chunks = chunks @@ -219,6 +221,7 @@ class GenerateDataSetWorker(BaseWorker): usr_call = '请输出分数' score = await llm.nostream([], sys_call, usr_call) score = eval(score) + score = max(0, min(100, score)) else: score = 100 if score > 60: diff --git a/data_chain/apps/base/task/worker/import_dataset_worker.py b/data_chain/apps/base/task/worker/import_dataset_worker.py index ee6200c..1442c64 100644 --- a/data_chain/apps/base/task/worker/import_dataset_worker.py +++ b/data_chain/apps/base/task/worker/import_dataset_worker.py @@ -205,6 +205,7 @@ class ImportDataSetWorker(BaseWorker): usr_call = '请输出分数' score = await llm.nostream([], sys_call, usr_call) score = eval(score) + score = min(max(score, 0), 100) databse_score += score if len(qa_entities) > 0: databse_score /= len(qa_entities) diff --git a/data_chain/apps/base/task/worker/parse_document_worker.py b/data_chain/apps/base/task/worker/parse_document_worker.py index 07cfd2b..e6cad5b 100644 --- a/data_chain/apps/base/task/worker/parse_document_worker.py +++ b/data_chain/apps/base/task/worker/parse_document_worker.py @@ -67,7 +67,7 @@ class ParseDocumentWorker(BaseWorker): logging.exception(err) return False doc_id = task_entity.op_id - await DocumentManager.update_document_by_doc_id(doc_id, {"status": DocumentStatus.PENDING.value, "abstract": "", "abstract_vector": None}) + await DocumentManager.update_document_by_doc_id(doc_id, {"abstract": "", "abstract_vector": None}) await ImageManager.update_images_by_doc_id(doc_id, {"status": ImageStatus.DELETED.value}) await ChunkManager.update_chunk_by_doc_id(doc_id, {"status": ChunkStatus.DELETED.value}) tmp_path = os.path.join(DOC_PATH_IN_OS, str(task_id)) @@ -282,7 +282,6 @@ class ParseDocumentWorker(BaseWorker): '''合并和拆分内容''' if doc_entity.parse_method == ParseMethod.QA or doc_entity.parse_relut_topology == DocParseRelutTopology.TREE: return - nodes = [] for node in parse_result.nodes: if node.type == ChunkType.TEXT: @@ -319,7 +318,7 @@ class ParseDocumentWorker(BaseWorker): tmp = sentence else: if TokenTool.get_tokens(tmp) + TokenTool.get_tokens(sentence) > doc_entity.chunk_size: - node = ParseNode( + tmp_node = ParseNode( id=uuid.uuid4(), lv=node.lv, parse_topology_type=ChunkParseTopology.GERNERAL, @@ -328,14 +327,26 @@ class ParseDocumentWorker(BaseWorker): type=ChunkType.TEXT, link_nodes=[] ) - nodes.append(node) + nodes.append(tmp_node) + tmp = '' else: tmp += sentence else: + if len(tmp) > 0: + tmp_node = ParseNode( + id=uuid.uuid4(), + lv=node.lv, + parse_topology_type=ChunkParseTopology.GERNERAL, + text_feature=tmp, + content=tmp, + type=ChunkType.TEXT, + link_nodes=[] + ) + nodes.append(tmp_node) tmp = '' nodes.append(node) if len(tmp) > 0: - node = ParseNode( + tmp_node = ParseNode( id=uuid.uuid4(), lv=node.lv, parse_topology_type=ChunkParseTopology.GERNERAL, @@ -344,7 +355,7 @@ class ParseDocumentWorker(BaseWorker): type=ChunkType.TEXT, link_nodes=[] ) - nodes.append(node) + nodes.append(tmp_node) parse_result.nodes = nodes @staticmethod @@ -556,7 +567,9 @@ class ParseDocumentWorker(BaseWorker): return None await DocumentManager.update_document_by_doc_id(task_entity.op_id, {"status": DocumentStatus.IDLE.value}) if task_entity.status == TaskStatus.PENDING.value or task_entity.status == TaskStatus.RUNNING.value or task_entity.status == TaskStatus.FAILED.value: - await DocumentManager.update_document_by_doc_id(task_entity.op_id, {"status": DocumentStatus.PENDING.value, "abstract": "", "abstract_vector": None}) + if task_entity.status == TaskStatus.RUNNING.value or task_entity.status == TaskStatus.FAILED.value: + await TaskManager.update_task_by_id(task_id, {"status": TaskStatus.CANCLED.value}) + await DocumentManager.update_document_by_doc_id(task_entity.op_id, {"abstract": "", "abstract_vector": None}) await ImageManager.update_images_by_doc_id(task_entity.op_id, {"status": ImageStatus.DELETED.value}) await ChunkManager.update_chunk_by_doc_id(task_entity.op_id, {"status": ChunkStatus.DELETED.value}) tmp_path = os.path.join(DOC_PATH_IN_OS, str(task_id)) diff --git a/data_chain/apps/router/acc_testing.py b/data_chain/apps/router/acc_testing.py index 46c68f2..3c0da82 100644 --- a/data_chain/apps/router/acc_testing.py +++ b/data_chain/apps/router/acc_testing.py @@ -33,7 +33,6 @@ async def list_testing_by_kb_id( action: Annotated[str, Depends(get_route_info)], req: Annotated[ListTestingRequest, Body()], ): - print(req.kb_id) if not (await KnowledgeBaseService.validate_user_action_to_knowledge_base(user_sub, req.kb_id, action)): raise Exception("用户没有权限访问该知识库的测试") list_testing_msg = await TestingService.list_testing_by_kb_id(req) @@ -60,7 +59,7 @@ async def download_testing_report_by_testing_id( if not (await TestingService.validate_user_action_to_testing(user_sub, testing_id, action)): raise Exception("用户没有权限访问该测试的测试报告") report_link_url = await TestingService.generate_testing_report_download_url(testing_id) - document_name, extension = str(testing_id)+".zip", "zip" + document_name, extension = str(testing_id)+".xlsx", "xlsx" async with AsyncClient() as async_client: response = await async_client.get(report_link_url) if response.status_code == 200: @@ -86,7 +85,7 @@ async def create_testing( req: Annotated[CreateTestingRequest, Body()]): if not (await DataSetService.validate_user_action_to_dataset(user_sub, req.dataset_id, action)): raise Exception("用户没有权限访问该数据集的测试") - task_id = await TestingService.create_testing(req) + task_id = await TestingService.create_testing(user_sub, req) return CreateTestingResponsing(result=task_id) diff --git a/data_chain/apps/router/dataset.py b/data_chain/apps/router/dataset.py index dc5c3ea..bda6270 100644 --- a/data_chain/apps/router/dataset.py +++ b/data_chain/apps/router/dataset.py @@ -54,7 +54,6 @@ async def list_data_in_dataset( if not (await DataSetService.validate_user_action_to_dataset(user_sub, req.dataset_id, action)): raise Exception("用户没有权限访问该数据集的数据") list_data_in_dataset_msg = await DataSetService.list_data_in_dataset(req) - print(list_data_in_dataset_msg) return ListDataInDatasetResponse(result=list_data_in_dataset_msg) diff --git a/data_chain/apps/router/document.py b/data_chain/apps/router/document.py index 541eb2b..362f580 100644 --- a/data_chain/apps/router/document.py +++ b/data_chain/apps/router/document.py @@ -93,13 +93,13 @@ async def upload_docs( @router.post('/parse', response_model=ParseDocumentResponse, dependencies=[Depends(verify_user)]) async def parse_docuement_by_doc_ids( user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], doc_ids: Annotated[list[UUID], Body(alias="docIds")], parse: Annotated[bool, Query()]): for doc_id in doc_ids: - if not (await DocumentService.validate_user_action_to_document(user_sub, doc_id, "parse")): + if not (await DocumentService.validate_user_action_to_document(user_sub, doc_id, action)): raise Exception("用户没有权限解析该文档") - if parse: - doc_ids = await DocumentService.parse_docs(doc_ids, parse) + doc_ids = await DocumentService.parse_docs(doc_ids, parse) return ParseDocumentResponse(result=doc_ids) diff --git a/data_chain/apps/service/acc_testing_service.py b/data_chain/apps/service/acc_testing_service.py index 46e30c2..640d254 100644 --- a/data_chain/apps/service/acc_testing_service.py +++ b/data_chain/apps/service/acc_testing_service.py @@ -27,7 +27,7 @@ from data_chain.manager.team_manager import TeamManager from data_chain.manager.role_manager import RoleManager from data_chain.stores.minio.minio import MinIO from data_chain.entities.enum import TestingStatus, TaskType, TaskStatus -from data_chain.entities.common import IMPORT_DATASET_PATH_IN_OS, IMPORT_DATASET_PATH_IN_MINIO +from data_chain.entities.common import TESTING_REPORT_PATH_IN_MINIO from data_chain.stores.database.database import DataSetEntity from data_chain.logger.logger import logger as logging @@ -55,41 +55,41 @@ class TestingService: try: total, dataset_ids = await TestingManager.list_testing_unique_dataset_ids(req) dataset_entities = await DatasetManager.list_datasets_by_dataset_ids(dataset_ids) - dataset_dict = {dataset.dataset_id: dataset for dataset in dataset_entities} + dataset_dict = {dataset_entity.id: dataset_entity for dataset_entity in dataset_entities} dataset_testings = [] - llm = Convertor.convert_llm_config_to_llm() + llm = await Convertor.convert_llm_config_to_llm() testing_ids = [] for dataset_id in dataset_ids: dataset_entity = dataset_dict.get(dataset_id) testing_entities = await TestingManager.list_testing_by_dataset_id(dataset_id) dataset_testing = DatasetTesting( - dataset_id=dataset_entity.id, - dataset_name=dataset_entity.name, + datasetId=dataset_entity.id, + datasetName=dataset_entity.name, testings=[] ) for testing_entity in testing_entities: - testing = Convertor.convert_testing_entity_to_testing(testing_entity) + testing = await Convertor.convert_testing_entity_to_testing(testing_entity) testing.llm = llm dataset_testing.testings.append(testing) - testing_ids.append(testing_entity.testing_id) + testing_ids.append(testing_entity.id) dataset_testings.append(dataset_testing) task_entities = await TaskManager.list_current_tasks_by_op_ids(testing_ids) task_dict = {task.op_id: task for task in task_entities} task_report_entities = await TaskReportManager.list_current_task_report_by_task_ids( - [task.task_id for task in task_entities] + [task.id for task in task_entities] ) task_report_dict = {task_report.task_id: task_report for task_report in task_report_entities} for dataset_testing in dataset_testings: for testing in dataset_testing.testings: - task_entity = task_dict.get(testing.op_id) + task_entity = task_dict.get(testing.testing_id, None) if task_entity: - task_report_entity = task_report_dict.get(task_entity.task_id, None) + task_report_entity = task_report_dict.get(task_entity.id, None) task = await Convertor.convert_task_entity_to_task(task_entity, task_report_entity) - testing.task = task + testing.testing_task = task list_testing_msg = ListTestingMsg( total=total, - dataset_testings=dataset_testings + datasetTestings=dataset_testings ) return list_testing_msg except Exception as e: @@ -127,12 +127,9 @@ class TestingService: async def generate_testing_report_download_url(testing_id: uuid.UUID) -> str: """生成测试报告下载链接""" try: - task_entity = await TaskManager.get_current_task_by_op_id(testing_id) - if not task_entity: - raise Exception("任务不存在") - download_url = MinIO.generate_download_link( - IMPORT_DATASET_PATH_IN_MINIO, - str(task_entity.task_id), + download_url = await MinIO.generate_download_link( + TESTING_REPORT_PATH_IN_MINIO, + str(testing_id), ) return download_url except Exception as e: diff --git a/data_chain/apps/service/document_service.py b/data_chain/apps/service/document_service.py index 8f9caa3..7ec0cef 100644 --- a/data_chain/apps/service/document_service.py +++ b/data_chain/apps/service/document_service.py @@ -223,7 +223,8 @@ class DocumentService: else: if doc_entity.status != DocumentStatus.PENDING.value and doc_entity.status != DocumentStatus.RUNNING.value: continue - task_id = await TaskQueueService.stop_task(doc_id) + task_entity = await TaskManager.get_current_task_by_op_id(doc_id) + task_id = await TaskQueueService.stop_task(task_entity.id) if task_id: doc_ids_success.append(doc_id) return doc_ids_success diff --git a/data_chain/manager/chunk_manager.py b/data_chain/manager/chunk_manager.py index 39e1afe..13ef02b 100644 --- a/data_chain/manager/chunk_manager.py +++ b/data_chain/manager/chunk_manager.py @@ -68,6 +68,22 @@ class ChunkManager(): logging.exception("[ChunkManager] %s", err) raise e + async def get_chunk_cnt_by_kb_id(kb_id) -> int: + """根据文档ID查询文档解析结果""" + try: + async with await DataBase.get_session() as session: + stmt = ( + select(func.count()) + .where(ChunkEntity.kb_id == kb_id) + .where(ChunkEntity.status != ChunkStatus.DELETED.value) + ) + result = await session.execute(stmt) + return result.scalar() + except Exception as e: + err = "根据文档ID查询文档解析结果失败" + logging.exception("[ChunkManager] %s", err) + raise e + @staticmethod async def get_chunk_tokens_by_doc_ids(doc_ids: List[uuid.UUID]) -> int: """根据文档ID查询文档解析结果""" @@ -85,6 +101,23 @@ class ChunkManager(): logging.exception("[ChunkManager] %s", err) raise e + @staticmethod + async def get_chunk_tokens_by_kb_id(kb_id) -> int: + """根据文档ID查询文档解析结果""" + try: + async with await DataBase.get_session() as session: + stmt = ( + select(func.sum(ChunkEntity.tokens)) + .where(ChunkEntity.kb_id == kb_id) + .where(ChunkEntity.status != ChunkStatus.DELETED.value) + ) + result = await session.execute(stmt) + return result.scalar() + except Exception as e: + err = "根据文档ID查询文档解析结果失败" + logging.exception("[ChunkManager] %s", err) + raise e + async def list_chunk( req: ListChunkRequest, ) -> Tuple[int, List[ChunkEntity]]: @@ -103,7 +136,6 @@ class ChunkManager(): stmt = stmt.where(ChunkEntity.type.in_([t.value for t in req.types])) count_stmt = select(func.count()).select_from(stmt.subquery()) total = (await session.execute(count_stmt)).scalar() - print(total) stmt = stmt.offset((req.page - 1) * req.page_size).limit(req.page_size) stmt = stmt.order_by(ChunkEntity.global_offset) result = await session.execute(stmt) diff --git a/data_chain/manager/document_manager.py b/data_chain/manager/document_manager.py index 77671b8..3e54777 100644 --- a/data_chain/manager/document_manager.py +++ b/data_chain/manager/document_manager.py @@ -107,6 +107,21 @@ class DocumentManager(): logging.exception("[DocumentManager] %s", err) raise e + @staticmethod + async def get_doc_cnt_by_kb_id(kb_id: uuid.UUID) -> int: + """根据知识库ID获取文档数量""" + try: + async with await DataBase.get_session() as session: + stmt = select(func.count()).select_from(DocumentEntity).where( + and_(DocumentEntity.kb_id == kb_id, + DocumentEntity.status != DocumentStatus.DELETED.value)) + result = await session.execute(stmt) + return result.scalar() + except Exception as e: + err = "获取文档数量失败" + logging.exception("[DocumentManager] %s", err) + raise e + @staticmethod async def list_document(req: ListDocumentRequest) -> tuple[int, List[DocumentEntity]]: """ diff --git a/data_chain/manager/testing_manager.py b/data_chain/manager/testing_manager.py index 0942f66..09283c6 100644 --- a/data_chain/manager/testing_manager.py +++ b/data_chain/manager/testing_manager.py @@ -33,7 +33,7 @@ class TestingManager(): async with await DataBase.get_session() as session: stmt = ( select(TestingEntity) - .where(TestingEntity.testing_id == testing_id) + .where(TestingEntity.id == testing_id) ) result = await session.execute(stmt) return result.scalars().first() @@ -120,14 +120,14 @@ class TestingManager(): async with await DataBase.get_session() as session: stmt = ( update(TestingEntity) - .where(TestingEntity.testing_id == testing_id) + .where(TestingEntity.id == testing_id) .values(**testing_dict) ) await session.execute(stmt) await session.commit() stmt = ( select(TestingEntity) - .where(TestingEntity.testing_id == testing_id) + .where(TestingEntity.id == testing_id) ) result = await session.execute(stmt) return result.scalars().first() @@ -145,14 +145,14 @@ class TestingManager(): async with await DataBase.get_session() as session: stmt = ( update(TestingEntity) - .where(TestingEntity.testing_id.in_(testing_ids)) + .where(TestingEntity.id.in_(testing_ids)) .values(**testing_dict) ).returning(TestingEntity) await session.execute(stmt) await session.commit() stmt = ( select(TestingEntity) - .where(TestingEntity.testing_id.in_(testing_ids)) + .where(TestingEntity.id.in_(testing_ids)) ) result = await session.execute(stmt) return result.scalars().all() diff --git a/data_chain/parser/handler/pdf_parser.py b/data_chain/parser/handler/pdf_parser.py index 5cfe762..a17d5dd 100644 --- a/data_chain/parser/handler/pdf_parser.py +++ b/data_chain/parser/handler/pdf_parser.py @@ -84,7 +84,6 @@ class PdfParser(BaseParser): node_with_bbox = ParseNodeWithBbox( node=ParseNode( id=uuid.uuid4(), - lv=0, parse_topology_type=ChunkParseTopology.GRAPHNODE, content=row, diff --git a/data_chain/rag/doc2chunk_bfs_searcher.py b/data_chain/rag/doc2chunk_bfs_searcher.py index ac6e8ca..d30f21b 100644 --- a/data_chain/rag/doc2chunk_bfs_searcher.py +++ b/data_chain/rag/doc2chunk_bfs_searcher.py @@ -12,11 +12,11 @@ from data_chain.entities.enum import SearchMethod from data_chain.entities.enum import ChunkParseTopology -class Doc2Chunk(BaseModel): +class Doc2ChunkBfsSearcher(BaseSearcher): """ 关键词向量检索 """ - name = SearchMethod.DOC2CHUNK.value + name = SearchMethod.DOC2CHUNK_BFS.value @staticmethod async def search( diff --git a/data_chain/rag/doc2chunk_searcher.py b/data_chain/rag/doc2chunk_searcher.py index 470ce73..4376702 100644 --- a/data_chain/rag/doc2chunk_searcher.py +++ b/data_chain/rag/doc2chunk_searcher.py @@ -11,7 +11,7 @@ from data_chain.embedding.embedding import Embedding from data_chain.entities.enum import SearchMethod -class Doc2Chunk(BaseModel): +class Doc2ChunkSearcher(BaseSearcher): """ 关键词向量检索 """ diff --git a/data_chain/stores/database/database.py b/data_chain/stores/database/database.py index dbeb4ab..fc12f2c 100644 --- a/data_chain/stores/database/database.py +++ b/data_chain/stores/database/database.py @@ -19,11 +19,13 @@ from data_chain.entities.enum import (Tokenizer, DocParseRelutTopology, DocumentStatus, ChunkType, + ChunkStatus, ImageStatus, ChunkParseTopology, DataSetStatus, QAStatus, TestingStatus, + TestCaseStatus, SearchMethod, TaskType, TaskStatus) @@ -311,7 +313,7 @@ class ChunkEntity(Base): global_offset = Column(Integer) # chunk在文档中的相对偏移 local_offset = Column(Integer) # chunk在块中的相对偏移 enabled = Column(Boolean) # chunk是否启用 - status = Column(String) # chunk状态 + status = Column(String, default=ChunkStatus.EXISTED.value) # chunk状态 created_time = Column( TIMESTAMP(timezone=True), nullable=True, @@ -478,7 +480,7 @@ class TestCaseEntity(Base): lcs = Column(Float) # 最长公共子序列得分 leve = Column(Float) # 编辑距离得分 jac = Column(Float) # 杰卡德相似系数 - status = Column(String) # 测试状态 + status = Column(String, default=TestCaseStatus.EXISTED.value) # 测试状态 created_at = Column( TIMESTAMP(timezone=True), nullable=True, diff --git a/witchaind-dataset-import/9233c762-c7c5-43f4-8aa0-372273039622/188455f8-fce3-4f87-8f6f-59e266af7d91.xlsx b/witchaind-dataset-import/9233c762-c7c5-43f4-8aa0-372273039622/188455f8-fce3-4f87-8f6f-59e266af7d91.xlsx deleted file mode 100644 index d9c74775ef4624786f63a1c67490fc0bf9100cf7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5911 zcmZ`-1yodP*B(kGgrRd#Qk0={2r21SkZwj8x`d%aN)Q}EIwTd5(s4-X5DNApNJmR?cqF-wyZ0qcvK1i6YGKmZrrvzlG+OQUmWH5GlCxRBY2!B`0~U@24gQ zlS;@#ZqCx2C8M+1M8s4;WcB$#zPL6&fvR4cqJ?l!WG?Qv(;$r{H?`x6YQoO@blETx z?bW;qmZ6NYCZ_v&N=>-nd0A^mE94M9Wtc}h{nyzAo3&>t{XrnHUx_&nIL$W&`Le=G zgE`GZNZFGc2yLn6jW5W>y@E0mze4 z?OL?}qbVteI|fGkh7`sWZ^rh%)KNbE;jdOIbza)cO;h{%YV4SKOn7IVdxGcR?4%lJ zumEoW02%iH0CEgFa0ea_Tc{)S&&cVY_fJ3Q6t`^PEJc zhHh%y2_w~yaqzZ|3k_^g6b5!D-QJ8>uSu7{1+Sj<$}29fhZ0a`@;JITpfeuPBdQiW4=j~LGy(ebd1tRW|- zo`%94a%dE-&HYWmw$gn%+J??%cackX1db`Ybz-1-Smb&#?A4LTy%UGF^NHz)Q1q%8 zh(FkgmtOyFV6XGLMGeEqIi2R#Mmrm+r}iEF14=<5o&Jei2s3 z9ky;}zv`Os5Ok!QUD$<-qPhp7T^Mj@wQIAQGk-ZWU)8~nFAgNmAMjvp{ajG*s-(W1MR_AU58F@&k@N7=mfA@^V$RgsNlQ_AX7=w+hANuexu7Hgny?sKkt|4J_=V?;8 z2^zha^~#NuWiDk5?@iX&iq5`XY_BXDYMN-i(%D4H+rEX=0r`TyRVCUoW zCFO)4Bilcc&XW6A*5CRz6BBtQYs zhnBR$bZ%d$f>EwTq2`8U?fr0#0VL_U!AGHlhvwfldg7kHj@#+!;@zU_@hkTx)e zi>jn;=DOg5F8RO-M3UO;v`RyKv99_jM4F3_LmDB0-2Ui-?do5WIRlj467G*dvKu!pY#(OrZk~>_hL)unaIsJ!RD(au zuvVQ%ckMiA#b2>DV#Dv&8H9>Nk!8u!JFPxBPmyp`0k>?*i9sEBM}cmdA$V*ogJEZF0vHxK1r6VqLZ!1ajePw zAtsuY5?4(y_!!~baA~wFD^~9X_&x6)MfEMt8v!v)1NZZ*RUR)5hL=tZU9^)Q-c6$n{fuCBNdb zoNR2i&>z1rSqS~&RCeEs){*%!q#2e%Zd2Sf4XtR>1)+6~_R8z+B$1RF=u3l=Z`nko z8dQX2N>+)~g(|uGv`^}nIh9*=MH}P=P-Mw8bqhZ`Ts59lk$Iv>u=%TNAw@n=KAXb5 z87?Axuu-G|*}S^R=J0ZQMyU_8=V`K6llk6-OQ#~qlK8UOFB7@afWxVUQ>O%RCAAFi zHc~7Pd2E(93^%B#`4_de;t@B1Io5miov$3*S6u)UpN=k;pz6dddrs>5$ROx-2-vPn|FO>RtwZNGGaoiJ>2UZKQDEb@pFWGx=&TJ{KqT(5ylh#8HmfI*ffsmPjawa zen69dR8$aYv1NTtndS*whB(r9H~4P)`N(Ac;nXaV-%zn(fi)+ol=(g?QZ9#@OJX5|@ zlyKgn27$zn%kA6c&8@L>o6!we|6V|(bd9|P@Bn}=Isky?&jRA*>}Kz73x#^P^Za%D zvx*p~I!`J9>(BIn{D>!3o4xfc2GSe3N`w@=^ACfjZDEe2aY_;S>x(m9o;J#HQOuPQ z6kKlyBfGr=_Qxt9fu(CCJrifbXUM64 z75-w?@-jv0*l6bR6#isk5IOqk0jl01>ce-tz(9z1_l&XJ>}#S1%FAln^Iln2FWmzs ziSw$OjPhB;%rTT+a8c><*J!|J{HVT|*UHQJBDA;>4dRtNgeEd~~#sDB9 zOssg6IUFpe6->ZxElRC)H^aVD_pPxiZ7-y9*C6se&&qQgh#~cE|pjv z&LLj$(5b*tFQ7`gOKBX-UhrIhskzLZN=r{wR$kY;d1{d~y!zt4%j@E1l`PZ6bz{fq znF}2=V6rNAh1`zuofnF{knX;Gx7hReP{L%5FzR5ro@09icCT3QaaLHBPc^Y7Z_-ZA z@u8~%gmM=#BGZZnZW(h3i(3Z?sNA|0Ded0Y^9{hyNIAs(EQ)yz$4x^Z;E|f>4YMfa zL&Mnoloq#dAy-uU8Jqi8yJY(TyuP-|?BCzk@V4;{pACZ(D}H^isOb&&j)^xI(S0V& z`!g|zEXnWHT^dELTRpKX9EvC;RfYqkaH%f|P@*Jc>-GMkeBLTuW$9Z!`;7G37;SUA zgE=pMmpJ+7M=W#0)p*3>vb+evZ2xs8-jGHmMWB||FJK0gmTeA?p~f{gET`3|?J9UV zg~C2k3<3tm_Vqn_=@>Bqf50#hAzdVQ=$g+e_2j-1F^*)WqQ0mh+c57w!Fd3?~ID8+AC@1*pM~33>|)NCt{kZ1^`fO<6fs{ zT2Z*@;pmpb+bOayIt`?ER6^;7EJjU05=pn`Dg^Q46+y4X>BZ)LL>6&wEQIGu=q6XB zwa3ti44GoxViAoYPsZLbAMbwi>As${Zc&C>fRZ@!b>LK_HcntkXSH;)7x^5_ z!WeP!rdaU^3MQlBX|4-+BGYi0V9Jkrf(&43a=KTNf=#uE!_DycPdaoSI0quRLllwG zjuSH86V{sP8?XdP$4~Fn50RZ&!sCPNzJI;Ea!gT}Qj)YTjn&qPCC zwW!i)h>o#F5OQr6fGgi*Ft9 zZ_Z&z74^y(JrwMYh~h|q_?(Uc&@BX?85WR_>7#oEIYMWqsxEHqyk-F^nL zT@D>K(uc=-)d!E+He`q^ZJM8W*B_@C<6mMii(7G8Y+Hxee|?(oGV70q<oY@uk8`6KTp-D0==Cty2U1+V$gL?;6cGkN17loI=>0OZVY3*@&1DVjBMBmg#{_2StdA0~t zil?LnTf2$g8GY}pM79&Rt+L6>g!jS=1B{;2*5*NL$GiJyK@J!9Vzn}Y+DKbDKo4-D z$n_I@uROBYyt9I|tGV*N#V>-DHO$)(F;|bpTs5$=kCmgDyZ16atUM#{F_#@AMFzf$uFs}yfLDK4=!DL&MNokFzZvL$#)Lsf(imb9fyd))iNt)q>s_c ziL0#+LF^AJwpXr}$V;J+PLhoPz`v9CnRhBSKNbLR7c)_kKgs*MOR{u!w*Q^JG1~nq zEz-dAH!gR~2XgUL2dSeCfm}~d1Js3k25Kj(Dq&yDw%M8VciP^4%`^y4t!_&_uDU12 zZZ;j3l5Uuv`VI!R+Yz4D!jVah8Z+vDLjrT4aiFya>~d#58wVdyy)fWbD7pP^ln|dH zgo0ut2-FO@+?z_v;GJg#b$L0RR^lYUiB@*8p-%h>t!cgJR(Q_c5l2GyWj6uZNKm=? z?#*dWcBzerhmm6!wYn_Tsn7@dOp2>KKK{jwnr}N=MhGNx$!FhC!Y%FSi6S%v3AL90@^#|Du)~((lw1dU^)RN#GQ}0Ob%MzbwWm$ z%$+=RWW%t!^eA6O*^9S@Cpi5Q8=S1|{EF%mJ9;~21ZOge#p*yCTwbG>l`%cH1VigF zhGI+~`dh{L6H5Oi`LD405+_Ogf|n?)jBE)Yq*Ww`r)Hwh$qmHYXCWhNDG6EGT2k(Ne%h!9 zNiK$W=R5B@!k+qf~XjjD=w{~AXgFh(zL+p%}R;1m|>&O#C#X6+$0^I*!!;wNpx1&MHM)AZPxoe z_|)06cweDw$IqLd5q#@XCm@AX;0AVNvUE=ATC*rw7nZ@9J{@eF$!Hk8sE_MexFCXw zECyTZ|3n}Awo_lx_wJH5Z#76&)QxD+1^$ellQIp&Fo}gtf%E^1n3$OP>j=W|`+p_P z>*(u};vXyk5QJ6!PxSu^jMw4UHMhUvy_nzs7X|LRf$Mv&{}>1+x%HQU|GERaZsmGI z@gFN#a7=%JapUjy;yUzt-ToV@hLME-QN^zVuh+r9fkGH<7W2=4ChT?a^(_AzoJjT; p`2WfI>vpau(%*KxF%sy1Nh>W?Jd8vN0N`WJhnNc9@;i_K{{!_th>8FJ diff --git a/witchaind-dataset-import/dd3471bd-33bc-4075-9fa1-ab8d236e35b1/188455f8-fce3-4f87-8f6f-59e266af7d91.yaml b/witchaind-dataset-import/dd3471bd-33bc-4075-9fa1-ab8d236e35b1/188455f8-fce3-4f87-8f6f-59e266af7d91.yaml deleted file mode 100644 index a9db39b..0000000 --- a/witchaind-dataset-import/dd3471bd-33bc-4075-9fa1-ab8d236e35b1/188455f8-fce3-4f87-8f6f-59e266af7d91.yaml +++ /dev/null @@ -1,13 +0,0 @@ -- answer: 《图书出版的艺术和科学》的作者是小赫伯特·S·贝利(Herbert·S·Bailey,Jr.)。 - chunk: "【 文献号 】1-2432\n【原文出处】出版发行研究\n【原刊地名】京\n【原刊期号】199601\n【原刊页号】40-41\n【分 类 号】Z1\n\ - 【分 类 名】出版工作、图书评介\n【 作 者 】王益\n【复印期号】199604\n【 标 题 】美国出版社怎样经营管理?——介绍《图书出版的艺术和科学》\n\ - 【 正 文 】\n 美国出版的有关图书出版的专业书籍,概论性的有好几本,专讲经营管理的并不多,《图书出版的艺术和科学》(The Art and Scienceo\n\ - f Book Pblishing)是经常被人推荐的一本。作者小赫伯特·S·贝利(Herbert·S·Bailey,Jr.),在大学文学系毕业之后,1946年进入普林\n\ - 斯顿大学出版社当了8年编辑,1954年出任该出版社社长, 直至1986年退休,从事出版工作共40年。1970年,他写了这本书,1980年再版,199\n0年三版。这本书,广泛地被大学采用作出版课程教材,也被出版社工作人员选作参考读物。从1970年至1990年,时隔20年,他认为他所阐述的\n\ - 基本原则仍是正确的,因而出版时基本上没有修改。【 文献号 】1-2432\n【原文出处】出版发行研究\n【原刊地名】京\n【原刊期号】199601\n【原刊页号】40-41\n\ - 【分 类 号】Z1\n【分 类 名】出版工作、图书评介\n【 作 者 】王益\n【复印期号】199604\n【 标 题 】美国出版社怎样经营管理?——介绍《图书出版的艺术和科学》\n\ - 【 正 文 】\n 美国出版的有关图书出版的专业书籍,概论性的有好几本,专讲经营管理的并不多,《图书出版的艺术和科学》(The Art and Scienceo\n\ - f Book Pblishing)是经常被人推荐的一本。作者小赫伯特·S·贝利(Herbert·S·Bailey,Jr.),在大学文学系毕业之后,1946年进入普林\n\ - 斯顿大学出版社当了8年编辑,1954年出任该出版社社长, 直至1986年退休,从事出版工作共40年。1970年,他写了这本书,1980年再版,199\n0年三版。这本书,广泛地被大学采用作出版课程教材,也被出版社工作人员选作参考读物。从1970年至1990年,时隔20年,他认为他所阐述的\n\ - 基本原则仍是正确的,因而出版时基本上没有修改。有关出版工作的方方面面,它都涉及到了。" - question: 《图书出版的艺术和科学》的作者是谁? -- Gitee From 4b4f65ae564d95eb3222da20dc8fec913ca09353 Mon Sep 17 00:00:00 2001 From: zxstty Date: Thu, 15 May 2025 18:10:46 +0800 Subject: [PATCH 12/15] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=87=86=E7=A1=AE?= =?UTF-8?q?=E7=8E=87=E6=B5=8B=E8=AF=95=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../base/task/worker/acc_testing_worker.py | 18 +- data_chain/parser/tools/token_tool.py | 167 ++++++++++-------- 2 files changed, 96 insertions(+), 89 deletions(-) diff --git a/data_chain/apps/base/task/worker/acc_testing_worker.py b/data_chain/apps/base/task/worker/acc_testing_worker.py index 6be16f0..6eb4ab6 100644 --- a/data_chain/apps/base/task/worker/acc_testing_worker.py +++ b/data_chain/apps/base/task/worker/acc_testing_worker.py @@ -136,7 +136,7 @@ class TestingWorker(BaseWorker): testcase_entities = [] with open(config['PROMPT_PATH'], 'r', encoding='utf-8') as f: prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) - prompt_template = prompt_dict.get('INTENT_DETECT_PROMPT_TEMPLATE', '') + prompt_template = prompt_dict.get('LLM_PROMPT_TEMPLAT', '') for qa_entity in qa_entities: question = qa_entity.question answer = qa_entity.answer @@ -157,11 +157,9 @@ class TestingWorker(BaseWorker): bac_info += sub_bac_info+'\n' bac_info = TokenTool.get_k_tokens_words_from_content(bac_info, llm.max_tokens) prompt = prompt_template.format( - history='', - question=question, bac_info=bac_info ) - llm_answer = await llm.nostream([], prompt, '请输出答案') + llm_answer = await llm.nostream([], prompt, question) sub_socres = [] pre = await TokenTool.cal_precision(question, bac_info, llm) if pre: @@ -184,20 +182,14 @@ class TestingWorker(BaseWorker): else: rel = -1 lcs = TokenTool.cal_lcs(answer, llm_answer) - if lcs: + if lcs != -1: sub_socres.append(lcs) - else: - lcs = -1 leve = TokenTool.cal_leve(answer, llm_answer) - if leve: + if leve != -1: sub_socres.append(leve) - else: - leve = -1 jac = TokenTool.cal_jac(answer, llm_answer) - if jac: + if jac != -1: sub_socres.append(jac) - else: - jac = -1 score = -1 if sub_socres: score = sum(sub_socres) / len(sub_socres) diff --git a/data_chain/parser/tools/token_tool.py b/data_chain/parser/tools/token_tool.py index 3adc254..c7546fd 100644 --- a/data_chain/parser/tools/token_tool.py +++ b/data_chain/parser/tools/token_tool.py @@ -417,91 +417,106 @@ class TokenTool: """ 计算两个字符串的最长公共子序列长度得分 """ - words1 = TokenTool.split_words(str1) - words2 = TokenTool.split_words(str2) - new_words1 = [] - new_words2 = [] - for word in words1: - if word not in TokenTool.stopwords: - new_words1.append(word) - for word in words2: - if word not in TokenTool.stopwords: - new_words2.append(word) - if len(new_words1) == 0 and len(new_words2) == 0: - return 100 - if len(new_words1) == 0 or len(new_words2) == 0: - return 0 - m = len(new_words1) - n = len(new_words2) - dp = np.zeros((m+1, n+1)) - for i in range(1, m+1): - for j in range(1, n+1): - if new_words1[i-1] == new_words2[j-1]: - dp[i][j] = dp[i-1][j-1] + 1 - else: - dp[i][j] = max(dp[i-1][j], dp[i][j-1]) - lcs_length = dp[m][n] - score = lcs_length / min(len(new_words1), len(new_words2)) * 100 - return score + try: + words1 = TokenTool.split_words(str1) + words2 = TokenTool.split_words(str2) + new_words1 = [] + new_words2 = [] + for word in words1: + if word not in TokenTool.stopwords: + new_words1.append(word) + for word in words2: + if word not in TokenTool.stopwords: + new_words2.append(word) + if len(new_words1) == 0 and len(new_words2) == 0: + return 100 + if len(new_words1) == 0 or len(new_words2) == 0: + return 0 + m = len(new_words1) + n = len(new_words2) + dp = np.zeros((m+1, n+1)) + for i in range(1, m+1): + for j in range(1, n+1): + if new_words1[i-1] == new_words2[j-1]: + dp[i][j] = dp[i-1][j-1] + 1 + else: + dp[i][j] = max(dp[i-1][j], dp[i][j-1]) + lcs_length = dp[m][n] + score = lcs_length / min(len(new_words1), len(new_words2)) * 100 + return score + except Exception as e: + err = f"[TokenTool] 计算lcs失败 {e}" + logging.exception("[TokenTool] %s", err) + return -1 @staticmethod def cal_leve(str1: str, str2: str) -> float: """ 计算两个字符串的编辑距离 """ - words1 = TokenTool.split_words(str1) - words2 = TokenTool.split_words(str2) - new_words1 = [] - new_words2 = [] - for word in words1: - if word not in TokenTool.stopwords: - new_words1.append(word) - for word in words2: - if word not in TokenTool.stopwords: - new_words2.append(word) - if len(new_words1) == 0 and len(new_words2) == 0: - return 100 - if len(new_words1) == 0 or len(new_words2) == 0: - return 0 - m = len(new_words1) - n = len(new_words2) - dp = np.zeros((m+1, n+1)) - for i in range(m+1): - dp[i][0] = i - for j in range(n+1): - dp[0][j] = j - for i in range(1, m+1): - for j in range(1, n+1): - if new_words1[i-1] == new_words2[j-1]: - dp[i][j] = dp[i-1][j-1] - else: - dp[i][j] = min(dp[i-1][j]+1, dp[i][j-1]+1, dp[i-1][j-1]+1) - edit_distance = dp[m][n] - score = (1 - edit_distance / max(len(new_words1), len(new_words2))) * 100 - return score + try: + words1 = TokenTool.split_words(str1) + words2 = TokenTool.split_words(str2) + new_words1 = [] + new_words2 = [] + for word in words1: + if word not in TokenTool.stopwords: + new_words1.append(word) + for word in words2: + if word not in TokenTool.stopwords: + new_words2.append(word) + if len(new_words1) == 0 and len(new_words2) == 0: + return 100 + if len(new_words1) == 0 or len(new_words2) == 0: + return 0 + m = len(new_words1) + n = len(new_words2) + dp = np.zeros((m+1, n+1)) + for i in range(m+1): + dp[i][0] = i + for j in range(n+1): + dp[0][j] = j + for i in range(1, m+1): + for j in range(1, n+1): + if new_words1[i-1] == new_words2[j-1]: + dp[i][j] = dp[i-1][j-1] + else: + dp[i][j] = min(dp[i-1][j]+1, dp[i][j-1]+1, dp[i-1][j-1]+1) + edit_distance = dp[m][n] + score = (1 - edit_distance / max(len(new_words1), len(new_words2))) * 100 + return score + except Exception as e: + err = f"[TokenTool] 计算leve失败 {e}" + logging.exception("[TokenTool] %s", err) + return -1 @staticmethod def cal_jac(str1: str, str2: str) -> float: """ 计算两个字符串的Jaccard相似度 """ - if len(str1) == 0 and len(str2) == 0: - return 100 - words1 = TokenTool.split_words(str1) - words2 = TokenTool.split_words(str2) - new_words1 = [] - new_words2 = [] - for word in words1: - if word not in TokenTool.stopwords: - new_words1.append(word) - for word in words2: - if word not in TokenTool.stopwords: - new_words2.append(word) - if len(new_words1) == 0 or len(new_words2) == 0: - return 0 - set1 = set(new_words1) - set2 = set(new_words2) - intersection = len(set1.intersection(set2)) - union = len(set1.union(set2)) - score = intersection / union * 100 - return score + try: + if len(str1) == 0 and len(str2) == 0: + return 100 + words1 = TokenTool.split_words(str1) + words2 = TokenTool.split_words(str2) + new_words1 = [] + new_words2 = [] + for word in words1: + if word not in TokenTool.stopwords: + new_words1.append(word) + for word in words2: + if word not in TokenTool.stopwords: + new_words2.append(word) + if len(new_words1) == 0 or len(new_words2) == 0: + return 0 + set1 = set(new_words1) + set2 = set(new_words2) + intersection = len(set1.intersection(set2)) + union = len(set1.union(set2)) + score = intersection / union * 100 + return score + except Exception as e: + err = f"[TokenTool] 计算jac失败 {e}" + logging.exception("[TokenTool] %s", err) + return -1 \ No newline at end of file -- Gitee From 064cf7a527eb7d2515fab173ba5e104ca30e4035 Mon Sep 17 00:00:00 2001 From: zxstty Date: Thu, 15 May 2025 21:30:39 +0800 Subject: [PATCH 13/15] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=88=9B=E5=BB=BA?= =?UTF-8?q?=E5=9B=A2=E9=98=9F=E6=97=B6=E5=80=99pulic=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data_chain/apps/base/convertor.py | 1 + .../base/task/worker/acc_testing_worker.py | 17 +++----- .../base/task/worker/export_dataset_worker.py | 2 + .../task/worker/generate_dataset_worker.py | 6 ++- data_chain/apps/router/dataset.py | 8 ++-- .../apps/service/acc_testing_service.py | 4 +- data_chain/apps/service/chunk_service.py | 4 +- data_chain/apps/service/dataset_service.py | 8 ++-- data_chain/apps/service/document_service.py | 2 +- data_chain/apps/service/session_service.py | 5 +-- data_chain/common/prompt.yaml | 1 + data_chain/entities/common.py | 18 ++++++++ data_chain/entities/request_data.py | 4 +- data_chain/entities/response_data.py | 1 + data_chain/manager/dataset_manager.py | 3 +- data_chain/manager/knowledge_manager.py | 3 +- data_chain/manager/qa_manager.py | 3 +- data_chain/manager/testing_manager.py | 42 +++++++++++-------- data_chain/parser/tools/token_tool.py | 4 ++ data_chain/rag/base_searcher.py | 5 ++- 20 files changed, 87 insertions(+), 54 deletions(-) diff --git a/data_chain/apps/base/convertor.py b/data_chain/apps/base/convertor.py index f5d1dc8..703a15a 100644 --- a/data_chain/apps/base/convertor.py +++ b/data_chain/apps/base/convertor.py @@ -552,6 +552,7 @@ class Convertor: aveLeve=testing_entity.ave_leve, aveJac=testing_entity.ave_jac, authorName=testing_entity.author_name, + topk=testing_entity.top_k, status=TestingStatus(testing_entity.status), ) return testing diff --git a/data_chain/apps/base/task/worker/acc_testing_worker.py b/data_chain/apps/base/task/worker/acc_testing_worker.py index 6eb4ab6..454cb4f 100644 --- a/data_chain/apps/base/task/worker/acc_testing_worker.py +++ b/data_chain/apps/base/task/worker/acc_testing_worker.py @@ -162,25 +162,17 @@ class TestingWorker(BaseWorker): llm_answer = await llm.nostream([], prompt, question) sub_socres = [] pre = await TokenTool.cal_precision(question, bac_info, llm) - if pre: + if pre != -1: sub_socres.append(pre) - else: - pre = -1 rec = await TokenTool.cal_recall(answer, llm_answer, llm) - if rec: + if rec != -1: sub_socres.append(rec) - else: - rec = -1 fai = await TokenTool.cal_faithfulness(question, llm_answer, bac_info, llm) - if fai: + if fai != -1: sub_socres.append(fai) - else: - fai = -1 rel = await TokenTool.cal_relevance(question, llm_answer, llm) - if rel: + if rel != -1: sub_socres.append(rel) - else: - rel = -1 lcs = TokenTool.cal_lcs(answer, llm_answer) if lcs != -1: sub_socres.append(lcs) @@ -378,6 +370,7 @@ class TestingWorker(BaseWorker): err = f"[TestingWorker] 测试不存在,测试ID: {task_id}" logging.exception(err) raise Exception(err) + await TestingManager.update_testing_by_testing_id(testing_entity.id, {"status": TestingStatus.RUNNING.value}) current_stage = 0 stage_cnt = 4 llm = LLM( diff --git a/data_chain/apps/base/task/worker/export_dataset_worker.py b/data_chain/apps/base/task/worker/export_dataset_worker.py index 94f71ee..ee7af1a 100644 --- a/data_chain/apps/base/task/worker/export_dataset_worker.py +++ b/data_chain/apps/base/task/worker/export_dataset_worker.py @@ -159,6 +159,8 @@ class ExportDataSetWorker(BaseWorker): err = f"[ExportDataSetWorker] 数据集不存在,数据集ID: {task_id}" logging.exception(err) raise Exception(err) + await DatasetManager.update_dataset_by_dataset_id( + dataset_entity.id, {"status": DataSetStatus.EXPORTING.value}) current_stage = 0 stage_cnt = 3 tmp_path, source_path, target_path = await ExportDataSetWorker.init_path(task_id) diff --git a/data_chain/apps/base/task/worker/generate_dataset_worker.py b/data_chain/apps/base/task/worker/generate_dataset_worker.py index d75f5cc..2dc0be7 100644 --- a/data_chain/apps/base/task/worker/generate_dataset_worker.py +++ b/data_chain/apps/base/task/worker/generate_dataset_worker.py @@ -137,8 +137,6 @@ class GenerateDataSetWorker(BaseWorker): for i in range(len(doc_chunks)): doc_chunk = doc_chunks[i] for j in range(len(doc_chunk.chunks)): - if data_cnt == 0: - break chunk = doc_chunk.chunks[j].text if dataset_entity.is_chunk_related: l = i-1 @@ -242,7 +240,11 @@ class GenerateDataSetWorker(BaseWorker): chunk=chunk, chunk_type=doc_chunk.chunks[i].type) qa_entities.append(qa_entity) + if len(qa_entities) >= dataset_entity.data_cnt: + break index += 1 + if len(qa_entities) >= dataset_entity.data_cnt: + break if len(qa_entities) > 0: dataset_score = dataset_score / len(qa_entities) await DatasetManager.update_dataset_by_dataset_id( diff --git a/data_chain/apps/router/dataset.py b/data_chain/apps/router/dataset.py index bda6270..fdac80a 100644 --- a/data_chain/apps/router/dataset.py +++ b/data_chain/apps/router/dataset.py @@ -145,9 +145,10 @@ async def generate_dataset_by_id( @router.put('', response_model=UpdateDatasetResponse, dependencies=[Depends(verify_user)]) async def update_dataset_by_dataset_id( user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], database_id: Annotated[UUID, Query(alias="databaseId")], req: Annotated[UpdateDatasetRequest, Body(...)]): - if not (await DataSetService.validate_user_action_to_dataset(user_sub, database_id)): + if not (await DataSetService.validate_user_action_to_dataset(user_sub, database_id, action)): raise Exception("用户没有权限访问该数据集") await DataSetService.update_dataset_by_dataset_id(database_id, req) return UpdateDatasetResponse() @@ -168,9 +169,10 @@ async def update_data_by_dataset_id( @router.delete('', response_model=DeleteDatasetResponse, dependencies=[Depends(verify_user)]) async def delete_dataset_by_dataset_ids( user_sub: Annotated[str, Depends(get_user_sub)], + action: Annotated[str, Depends(get_route_info)], database_ids: Annotated[list[UUID], Body(alias="databaseId")]): for database_id in database_ids: - if not (await DataSetService.validate_user_action_to_dataset(user_sub, database_id)): + if not (await DataSetService.validate_user_action_to_dataset(user_sub, database_id, action)): raise Exception("用户没有权限访问该数据集") dataset_ids = await DataSetService.delete_dataset_by_dataset_ids(database_ids) return DeleteDatasetResponse(result=dataset_ids) @@ -182,7 +184,7 @@ async def delete_data_by_data_ids( action: Annotated[str, Depends(get_route_info)], data_ids: Annotated[list[UUID], Body(alias="dataIds")]): for data_id in data_ids: - if not (await DataSetService.validate_user_action_to_dataset(user_sub, data_id, action)): + if not (await DataSetService.validate_user_action_to_data(user_sub, data_id, action)): raise Exception("用户没有权限访问该数据集的数据") await DataSetService.delete_data_by_data_ids(data_ids) return DeleteDataResponse() diff --git a/data_chain/apps/service/acc_testing_service.py b/data_chain/apps/service/acc_testing_service.py index 640d254..2baa399 100644 --- a/data_chain/apps/service/acc_testing_service.py +++ b/data_chain/apps/service/acc_testing_service.py @@ -53,7 +53,9 @@ class TestingService: async def list_testing_by_kb_id(req: ListTestingRequest) -> ListTestingMsg: """根据知识库ID查询测试""" try: - total, dataset_ids = await TestingManager.list_testing_unique_dataset_ids(req) + total, dataset_entities = await TestingManager.list_testing_unique_datasets(req) + dataset_entities.sort(key=lambda x: x.created_at, reverse=True) + dataset_ids = [dataset_entity.id for dataset_entity in dataset_entities] dataset_entities = await DatasetManager.list_datasets_by_dataset_ids(dataset_ids) dataset_dict = {dataset_entity.id: dataset_entity for dataset_entity in dataset_entities} dataset_testings = [] diff --git a/data_chain/apps/service/chunk_service.py b/data_chain/apps/service/chunk_service.py index 620e7c3..6098afe 100644 --- a/data_chain/apps/service/chunk_service.py +++ b/data_chain/apps/service/chunk_service.py @@ -124,8 +124,8 @@ class ChunkService: chunk = await Convertor.convert_chunk_entity_to_chunk(chunk_entity) if req.is_compress: chunk.text = TokenTool.compress_tokens(chunk.text) - dc = DocChunk(doc_id=chunk_entity.doc_id, doc_name=chunk_entity.doc_name, chunks=[chunk]) - search_chunk_msg.docChunks.append(dc) + dc = DocChunk(docId=chunk_entity.doc_id, docName=chunk_entity.doc_name, chunks=[chunk]) + search_chunk_msg.doc_chunks.append(dc) return search_chunk_msg async def update_chunk_by_id(chunk_id: uuid.UUID, req: UpdateChunkRequest) -> uuid.UUID: diff --git a/data_chain/apps/service/dataset_service.py b/data_chain/apps/service/dataset_service.py index a0943a5..0a4e376 100644 --- a/data_chain/apps/service/dataset_service.py +++ b/data_chain/apps/service/dataset_service.py @@ -94,7 +94,7 @@ class DataSetService: dataset.llm = llm task_entity = task_dict.get(dataset_entity.id, None) if task_entity: - task_report = task_report_dict.get(task_entity.id, None) if task_entity else None + task_report = task_report_dict.get(task_entity.id, None) task = await Convertor.convert_task_entity_to_task(task_entity, task_report) dataset.generate_task = task datasets.append(dataset) @@ -124,9 +124,9 @@ class DataSetService: """判断数据集是否有测试数据""" try: dataset_entity = await TestingManager.list_testing_by_dataset_id(dataset_id) - if dataset_entity is None: - return False - return True + if dataset_entity: + return True + return False except Exception as e: err = "判断数据集是否有测试数据失败" logging.exception("[DataSetService] %s", err) diff --git a/data_chain/apps/service/document_service.py b/data_chain/apps/service/document_service.py index 7ec0cef..5ec66da 100644 --- a/data_chain/apps/service/document_service.py +++ b/data_chain/apps/service/document_service.py @@ -125,7 +125,7 @@ class DocumentService: task_report = '' for task_report_entity in task_report_entities: task_report += f"任务报告ID: {task_report_entity.id}, " \ - f"任务报告内容: {task_report_entity.content}, " \ + f"任务报告内容: {task_report_entity.message}, " \ f"任务报告创建时间: {task_report_entity.created_time}\n" return task_report except Exception as e: diff --git a/data_chain/apps/service/session_service.py b/data_chain/apps/service/session_service.py index 0f0c959..712bae4 100644 --- a/data_chain/apps/service/session_service.py +++ b/data_chain/apps/service/session_service.py @@ -44,10 +44,9 @@ async def verify_user(request: HTTPConnection): async def get_user_sub(request: HTTPConnection) -> uuid: """从Session中获取用户""" - await UserManager.add_user((await Convertor.convert_user_sub_to_user_entity('admin'))) - return "admin" if config["DEBUG"]: - user_sub = "admin" + await UserManager.add_user((await Convertor.convert_user_sub_to_user_entity('admin'))) + return "admin" else: try: session_id = None diff --git a/data_chain/common/prompt.yaml b/data_chain/common/prompt.yaml index 36b2230..d197af2 100644 --- a/data_chain/common/prompt.yaml +++ b/data_chain/common/prompt.yaml @@ -128,6 +128,7 @@ STATEMENTS_TO_QUESTION_PROMPT: '你是一个文本分析专家,你的任务是 #01 如果陈诉是否与问题相关,请输出YES #02 如果陈诉与问题不相关,请输出NO #03 请仅输出YES或NO,不要输出其他内容 + #04 陈诉与问题相关是指,陈诉中的内容可以回答问题或者与问题在内容上有交际 例子: 输入1: 陈诉:openEuler是一个开源的操作系统。 diff --git a/data_chain/entities/common.py b/data_chain/entities/common.py index 2e48ed2..ee1b716 100644 --- a/data_chain/entities/common.py +++ b/data_chain/entities/common.py @@ -58,6 +58,8 @@ actions = [ 'name': '获取数据集列表', 'action': 'POST /dataset/list'}, {'type': 'dataset_data', 'name': '获取测试数据列表', 'action': 'POST /dataset/data'}, + {'type': 'dataset_data', + 'name': '获取测试数据下是否有测试任务', 'action': 'GET /dataset/testing/exist'}, {'type': 'dataset_data', 'name': '下载数据集', 'action': 'GET /dataset/download'}, {'type': 'dataset_data', @@ -70,8 +72,12 @@ actions = [ 'name': '生成数据集', 'action': 'POST /dataset/generate'}, {'type': 'dataset_data', 'name': '修改数据集信息', 'action': 'PUT /dataset'}, + {'type': 'dataset_data', + 'name': '修改测试样例', 'action': 'PUT /dataset/data'}, {'type': 'dataset_data', 'name': '删除数据集', 'action': 'DELETE /dataset'}, + {'type': 'dataset_data', + 'name': '删除测试样例', 'action': 'DELETE /dataset/data'}, {'type': 'testing', 'name': '获取测试列表', 'action': 'POST /testing/list'}, {'type': 'testing', @@ -166,6 +172,8 @@ default_roles = [ 'name': '获取数据集列表', 'action': 'POST /dataset/list'}, {'type': 'dataset_data', 'name': '获取测试数据列表', 'action': 'POST /dataset/data'}, + {'type': 'dataset_data', + 'name': '获取测试数据下是否有测试任务', 'action': 'GET /dataset/testing/exist'}, {'type': 'dataset_data', 'name': '下载数据集', 'action': 'GET /dataset/download'}, {'type': 'dataset_data', @@ -178,8 +186,12 @@ default_roles = [ 'name': '生成数据集', 'action': 'POST /dataset/generate'}, {'type': 'dataset_data', 'name': '修改数据集信息', 'action': 'PUT /dataset'}, + {'type': 'dataset_data', + 'name': '修改测试样例', 'action': 'PUT /dataset/data'}, {'type': 'dataset_data', 'name': '删除数据集', 'action': 'DELETE /dataset'}, + {'type': 'dataset_data', + 'name': '删除测试样例', 'action': 'DELETE /dataset/data'}, {'type': 'testing', 'name': '获取测试列表', 'action': 'POST /testing/list'}, {'type': 'testing', @@ -268,6 +280,8 @@ default_roles = [ 'name': '获取数据集列表', 'action': 'POST /dataset/list'}, {'type': 'dataset_data', 'name': '获取测试数据列表', 'action': 'POST /dataset/data'}, + {'type': 'dataset_data', + 'name': '获取测试数据下是否有测试任务', 'action': 'GET /dataset/testing/exist'}, {'type': 'dataset_data', 'name': '下载数据集', 'action': 'GET /dataset/download'}, {'type': 'dataset_data', @@ -280,8 +294,12 @@ default_roles = [ 'name': '生成数据集', 'action': 'POST /dataset/generate'}, {'type': 'dataset_data', 'name': '修改数据集信息', 'action': 'PUT /dataset'}, + {'type': 'dataset_data', + 'name': '修改测试样例', 'action': 'PUT /dataset/data'}, {'type': 'dataset_data', 'name': '删除数据集', 'action': 'DELETE /dataset'}, + {'type': 'dataset_data', + 'name': '删除测试样例', 'action': 'DELETE /dataset/data'}, {'type': 'testing', 'name': '获取测试列表', 'action': 'POST /testing/list'}, {'type': 'testing', diff --git a/data_chain/entities/request_data.py b/data_chain/entities/request_data.py index 1ce59d2..e76f3c9 100644 --- a/data_chain/entities/request_data.py +++ b/data_chain/entities/request_data.py @@ -50,7 +50,7 @@ class ListTeamUserRequest(BaseModel): class CreateTeamRequest(BaseModel): team_name: str = Field(default='这是一个默认的团队名称', min_length=1, max_length=30, alias="teamName") description: str = Field(default='', max_length=150) - is_public: bool = Field(default=False) + is_public: bool = Field(default=False, alias="isPublic") class UpdateTeamRequest(BaseModel): @@ -100,7 +100,7 @@ class UpdateKnowledgeBaseRequest(BaseModel): class ListDocumentRequest(BaseModel): kb_id: uuid.UUID = Field(description="资产id", alias="kbId") - doc_id: Optional[uuid.UUID] = Field(default=None, description="文档id", min_length=1, max_length=30, alias="docId") + doc_id: Optional[uuid.UUID] = Field(default=None, description="文档id", alias="docId") doc_name: Optional[str] = Field(default=None, description="文档名称", alias="docName") doc_type_id: Optional[uuid.UUID] = Field(default=None, description="文档类型id", alias="docTypeId") parse_status: Optional[list[TaskStatus]] = Field(default=None, description="文档解析状态", alias="parseStatus") diff --git a/data_chain/entities/response_data.py b/data_chain/entities/response_data.py index b953e99..43c7257 100644 --- a/data_chain/entities/response_data.py +++ b/data_chain/entities/response_data.py @@ -438,6 +438,7 @@ class Testing(BaseModel): ave_leve: float = Field(default=-1, description="编辑距离得分", alias="aveLeve") # 编辑距离得分 ave_jac: float = Field(default=-1, description="杰卡德相似系数", alias="aveJac") # 杰卡德相似系数 author_name: str = Field(description="测试创建者的用户名", alias="authorName") + topk: int = Field(description="检索到的片段数量", alias="topk") status: TestingStatus = Field(description="测试状态", alias="status") diff --git a/data_chain/manager/dataset_manager.py b/data_chain/manager/dataset_manager.py index a6cdd0c..e835d3d 100644 --- a/data_chain/manager/dataset_manager.py +++ b/data_chain/manager/dataset_manager.py @@ -98,6 +98,7 @@ class DatasetManager: stmt = stmt.where(DataSetEntity.is_chunk_related == req.is_chunk_related) if req.generate_status: stmt = stmt.where(subq.c.status.in_([status.value for status in req.generate_status])) + stmt = stmt.order_by(DataSetEntity.created_at.desc(), DataSetEntity.id.desc()) if req.score_order: if req.score_order == "asc": stmt = stmt.order_by(asc(DataSetEntity.score)) @@ -108,8 +109,6 @@ class DatasetManager: count_stmt = select(func.count()).select_from(stmt.subquery()) total = (await session.execute(count_stmt)).scalar() stmt = stmt.offset((req.page - 1) * req.page_size).limit(req.page_size) - stmt = stmt.order_by(DataSetEntity.created_at.desc()) - stmt = stmt.order_by(DataSetEntity.id.desc()) result = await session.execute(stmt) dataset_entities = result.scalars().all() return total, dataset_entities diff --git a/data_chain/manager/knowledge_manager.py b/data_chain/manager/knowledge_manager.py index 4bc32a1..5a3ae54 100644 --- a/data_chain/manager/knowledge_manager.py +++ b/data_chain/manager/knowledge_manager.py @@ -56,8 +56,7 @@ class KnowledgeBaseManager(): count_stmt = select(func.count()).select_from(stmt.subquery()) total = (await session.execute(count_stmt)).scalar() stmt = stmt.limit(req.page_size).offset((req.page - 1) * req.page_size) - stmt = stmt.order_by(KnowledgeBaseEntity.created_time.desc()) - stmt = stmt.order_by(KnowledgeBaseEntity.id.desc()) + stmt = stmt.order_by(KnowledgeBaseEntity.created_time.desc(), KnowledgeBaseEntity.id.desc()) result = await session.execute(stmt) knowledge_base_entities = result.scalars().all() return (total, knowledge_base_entities) diff --git a/data_chain/manager/qa_manager.py b/data_chain/manager/qa_manager.py index b671d13..db36655 100644 --- a/data_chain/manager/qa_manager.py +++ b/data_chain/manager/qa_manager.py @@ -88,8 +88,7 @@ class QAManager: count_stmt = select(func.count()).select_from(stmt.subquery()) total = (await session.execute(count_stmt)).scalar() stmt = stmt.offset((req.page - 1) * req.page_size).limit(req.page_size) - stmt = stmt.order_by(QAEntity.created_at.desc()) - stmt = stmt.order_by(QAEntity.id.desc()) + stmt = stmt.order_by(QAEntity.created_at.desc(), QAEntity.id.desc()) result = await session.execute(stmt) qa_entities = result.scalars().all() return total, qa_entities diff --git a/data_chain/manager/testing_manager.py b/data_chain/manager/testing_manager.py index 09283c6..f29fbfb 100644 --- a/data_chain/manager/testing_manager.py +++ b/data_chain/manager/testing_manager.py @@ -62,23 +62,28 @@ class TestingManager(): raise e @staticmethod - async def list_testing_unique_dataset_ids(req: ListTestingRequest) -> tuple[int, List[uuid.UUID]]: - """根据知识库ID查询测试""" + async def list_testing_unique_datasets(req: ListTestingRequest) -> tuple[int, List[DataSetEntity]]: try: async with await DataBase.get_session() as session: - subq = (select(TaskEntity.op_id, TaskEntity.status, func.row_number().over( - partition_by=TaskEntity.op_id, order_by=desc(TaskEntity.created_time)).label('rn')).subquery()) + subq = ( + select(TaskEntity.op_id, TaskEntity.status, func.row_number().over( + partition_by=TaskEntity.op_id, order_by=desc(TaskEntity.created_time) + ).label('rn')) + .select_from(TaskEntity) + .subquery() + ) - # 构建内层查询,先进行排序 inner_stmt = ( - select(TestingEntity.dataset_id) + select(DataSetEntity.id) + .select_from(TestingEntity) .outerjoin(subq, and_(TestingEntity.id == subq.c.op_id, subq.c.rn == 1)) .outerjoin(DataSetEntity, TestingEntity.dataset_id == DataSetEntity.id) ) inner_stmt = inner_stmt.where(DataSetEntity.status != DataSetStatus.DELETED.value) inner_stmt = inner_stmt.where(TestingEntity.status != TestingStatus.DELETED.value) + if req.kb_id: - inner_stmt = inner_stmt.where(TestingEntity.kb_id == req.kb_id) # 修正此处的条件 + inner_stmt = inner_stmt.where(TestingEntity.kb_id == req.kb_id) if req.testing_id: inner_stmt = inner_stmt.where(TestingEntity.id == req.testing_id) if req.testing_name: @@ -90,26 +95,29 @@ class TestingManager(): if req.author_name: inner_stmt = inner_stmt.where(TestingEntity.author_name.ilike(f"%{req.author_name}%")) - # 按期望的顺序排序 - inner_stmt = inner_stmt.order_by(desc(TestingEntity.created_at)) - inner_stmt = inner_stmt.order_by(asc(TestingEntity.id)) + inner_stmt = inner_stmt.order_by(desc(DataSetEntity.created_at), asc(DataSetEntity.id)) - # 将内层查询作为子查询,然后在外层查询中应用DISTINCT distinct_subq = inner_stmt.subquery() - stmt = select(distinct_subq.c.dataset_id).distinct() + stmt = select(distinct_subq.c.id).distinct() - # 计算总数 count_stmt = select(func.count()).select_from(inner_stmt.subquery()) total = (await session.execute(count_stmt)).scalar() - # 应用分页 stmt = stmt.offset((req.page - 1) * req.page_size).limit(req.page_size) - result = await session.execute(stmt) dataset_ids = result.scalars().all() - return total, dataset_ids + + if dataset_ids: + dataset_entities = await session.execute( + select(DataSetEntity).where(DataSetEntity.id.in_(dataset_ids)) + ) + dataset_entities = dataset_entities.scalars().all() + else: + dataset_entities = [] + + return total, dataset_entities except Exception as e: - err = "查询测试关联的dataset_id失败" + err = "查询测试关联的数据集失败" logging.exception("[TestingManager] %s", err) raise e diff --git a/data_chain/parser/tools/token_tool.py b/data_chain/parser/tools/token_tool.py index c7546fd..348feb5 100644 --- a/data_chain/parser/tools/token_tool.py +++ b/data_chain/parser/tools/token_tool.py @@ -295,6 +295,7 @@ class TokenTool: except Exception as e: err = f"[TokenTool] 计算recall失败 {e}" logging.exception("[TokenTool] %s", err) + return -1 @staticmethod async def cal_precision(question: str, content: str, llm: LLM) -> float: @@ -330,6 +331,7 @@ class TokenTool: except Exception as e: err = f"[TokenTool] 计算precision失败 {e}" logging.exception("[TokenTool] %s", err) + return -1 @staticmethod async def cal_faithfulness(question: str, answer: str, content: str, llm: LLM) -> float: @@ -369,6 +371,7 @@ class TokenTool: except Exception as e: err = f"[TokenTool] 计算faithfulness失败 {e}" logging.exception("[TokenTool] %s", err) + return -1 @staticmethod def cosine_distance_numpy(vector1, vector2): @@ -411,6 +414,7 @@ class TokenTool: except Exception as e: err = f"[TokenTool] 计算relevance失败 {e}" logging.exception("[TokenTool] %s", err) + return -1 @staticmethod def cal_lcs(str1: str, str2: str) -> float: diff --git a/data_chain/rag/base_searcher.py b/data_chain/rag/base_searcher.py index 284ee34..7a33f3d 100644 --- a/data_chain/rag/base_searcher.py +++ b/data_chain/rag/base_searcher.py @@ -65,7 +65,7 @@ class BaseSearcher: :param query: 查询 :return: 相关上下文 """ - chunk_entities = await ChunkManager.fetch_surrounding_chunk_by_doc_id_and_global_offset(chunk_entity.doc_id, chunk_entity.global_offset, banned_ids) + chunk_entities = await ChunkManager.fetch_surrounding_chunk_by_doc_id_and_global_offset(chunk_entity.doc_id, chunk_entity.global_offset, 50, banned_ids) chunk_entity_dict = {} min_offset = chunk_entity.global_offset max_offset = chunk_entity.global_offset @@ -151,5 +151,8 @@ class BaseSearcher: doc_chunk_dict[chunk_entity.doc_id] = DocChunk( doc_id=chunk_entity.doc_id, doc_name=chunk_entity.doc_name, chunks=[]) chunk = await Convertor.convert_chunk_entity_to_chunk(chunk_entity) + if chunk_entity.doc_id not in doc_chunk_dict: + doc_chunk_dict[chunk_entity.doc_id] = DocChunk( + doc_id=chunk_entity.doc_id, doc_name=chunk_entity.doc_name, chunks=[]) doc_chunk_dict[chunk_entity.doc_id].chunks.append(chunk) return list(doc_chunk_dict.values()) -- Gitee From 74f61baa33838c1576c573e26a3843644576e4d5 Mon Sep 17 00:00:00 2001 From: zxstty Date: Thu, 15 May 2025 21:38:57 +0800 Subject: [PATCH 14/15] =?UTF-8?q?=E4=BB=BB=E5=8A=A1=E5=8A=A0=E5=85=A5finis?= =?UTF-8?q?hed=20time?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data_chain/apps/base/convertor.py | 3 +++ data_chain/entities/response_data.py | 1 + 2 files changed, 4 insertions(+) diff --git a/data_chain/apps/base/convertor.py b/data_chain/apps/base/convertor.py index 703a15a..69aeead 100644 --- a/data_chain/apps/base/convertor.py +++ b/data_chain/apps/base/convertor.py @@ -358,8 +358,10 @@ class Convertor: """将任务实体和任务报告实体转换为任务""" try: task_completed = 0 + finished_time = None if task_report is not None: task_completed = task_report.current_stage/task_report.stage_cnt*100 + finished_time = task_report.created_time.strftime('%Y-%m-%d %H:%M') task = Task( opId=task_entity.op_id, opName=task_entity.op_name, @@ -367,6 +369,7 @@ class Convertor: taskStatus=TaskStatus(task_entity.status), taskType=TaskType(task_entity.type), taskCompleted=task_completed, + finishedTime=finished_time, createdTime=task_entity.created_time.strftime('%Y-%m-%d %H:%M') ) return task diff --git a/data_chain/entities/response_data.py b/data_chain/entities/response_data.py index 43c7257..37fdbbf 100644 --- a/data_chain/entities/response_data.py +++ b/data_chain/entities/response_data.py @@ -195,6 +195,7 @@ class Task(BaseModel): task_status: TaskStatus = Field(description="任务状态", alias="taskStatus") task_type: TaskType = Field(description="任务类型", alias="taskType") task_completed: float = Field(description="任务完成度", alias="taskCompleted") + finished_time: Optional[str] = Field(default=None, description="任务完成时间", alias="finishedTime") created_time: str = Field(description="任务创建时间", alias="createdTime") -- Gitee From 1c0132da757f8b5e3e1ee1bb1b46e7b5578353d4 Mon Sep 17 00:00:00 2001 From: zxstty Date: Thu, 15 May 2025 21:39:51 +0800 Subject: [PATCH 15/15] =?UTF-8?q?=E5=90=88=E5=85=A5chat2db?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- chat2db/app/__init__.py | 0 chat2db/app/app.py | 36 -- chat2db/app/base/ac_automation.py | 87 ---- chat2db/app/base/meta_databbase.py | 18 - chat2db/app/base/mysql.py | 217 -------- chat2db/app/base/postgres.py | 236 --------- chat2db/app/base/vectorize.py | 47 -- chat2db/app/router/database.py | 191 ------- chat2db/app/router/sql_example.py | 137 ----- chat2db/app/router/sql_generate.py | 124 ----- chat2db/app/router/table.py | 147 ------ chat2db/app/service/diff_database_service.py | 28 - chat2db/app/service/keyword_service.py | 131 ----- chat2db/app/service/sql_generate_service.py | 363 ------------- chat2db/common/.env.example | 27 - chat2db/common/init_sql_example.py | 107 ---- chat2db/common/table_name.yaml | 10 - chat2db/common/table_name_sql_exmple.yaml | 490 ------------------ chat2db/config/config.py | 55 -- chat2db/database/postgres.py | 126 ----- ...46\347\273\206\350\257\264\346\230\216.md" | 391 -------------- chat2db/llm/chat_with_model.py | 25 - chat2db/manager/column_info_manager.py | 69 --- chat2db/manager/database_info_manager.py | 98 ---- chat2db/manager/sql_example_manager.py | 76 --- chat2db/manager/table_info_manager.py | 87 ---- chat2db/model/request.py | 89 ---- chat2db/model/response.py | 6 - chat2db/scripts/chat2db_config/config.yaml | 2 - chat2db/scripts/docs/output_examples.xlsx | Bin 6080 -> 0 bytes chat2db/scripts/run_chat2db.py | 436 ---------------- chat2db/security/security.py | 116 ----- chat2db/templetes/change_txt_to_yaml.py | 92 ---- chat2db/templetes/prompt.yaml | 115 ---- data_chain/apps/base/convertor.py | 3 - data_chain/entities/request_data.py | 2 +- data_chain/entities/response_data.py | 1 - 37 files changed, 1 insertion(+), 4184 deletions(-) delete mode 100644 chat2db/app/__init__.py delete mode 100644 chat2db/app/app.py delete mode 100644 chat2db/app/base/ac_automation.py delete mode 100644 chat2db/app/base/meta_databbase.py delete mode 100644 chat2db/app/base/mysql.py delete mode 100644 chat2db/app/base/postgres.py delete mode 100644 chat2db/app/base/vectorize.py delete mode 100644 chat2db/app/router/database.py delete mode 100644 chat2db/app/router/sql_example.py delete mode 100644 chat2db/app/router/sql_generate.py delete mode 100644 chat2db/app/router/table.py delete mode 100644 chat2db/app/service/diff_database_service.py delete mode 100644 chat2db/app/service/keyword_service.py delete mode 100644 chat2db/app/service/sql_generate_service.py delete mode 100644 chat2db/common/.env.example delete mode 100644 chat2db/common/init_sql_example.py delete mode 100644 chat2db/common/table_name.yaml delete mode 100644 chat2db/common/table_name_sql_exmple.yaml delete mode 100644 chat2db/config/config.py delete mode 100644 chat2db/database/postgres.py delete mode 100644 "chat2db/docs/chat2db\345\267\245\345\205\267\350\257\246\347\273\206\350\257\264\346\230\216.md" delete mode 100644 chat2db/llm/chat_with_model.py delete mode 100644 chat2db/manager/column_info_manager.py delete mode 100644 chat2db/manager/database_info_manager.py delete mode 100644 chat2db/manager/sql_example_manager.py delete mode 100644 chat2db/manager/table_info_manager.py delete mode 100644 chat2db/model/request.py delete mode 100644 chat2db/model/response.py delete mode 100644 chat2db/scripts/chat2db_config/config.yaml delete mode 100644 chat2db/scripts/docs/output_examples.xlsx delete mode 100644 chat2db/scripts/run_chat2db.py delete mode 100644 chat2db/security/security.py delete mode 100644 chat2db/templetes/change_txt_to_yaml.py delete mode 100644 chat2db/templetes/prompt.yaml diff --git a/chat2db/app/__init__.py b/chat2db/app/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/chat2db/app/app.py b/chat2db/app/app.py deleted file mode 100644 index 71be6ed..0000000 --- a/chat2db/app/app.py +++ /dev/null @@ -1,36 +0,0 @@ -import uvicorn -from fastapi import FastAPI -import sys -from chat2db.app.router import sql_example -from chat2db.app.router import sql_generate -from chat2db.app.router import database -from chat2db.app.router import table -from chat2db.config.config import config -import logging - - -logging.basicConfig(stream=sys.stdout, level=logging.INFO, - format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') - -app = FastAPI() - -app.include_router(sql_example.router) -app.include_router(sql_generate.router) -app.include_router(database.router) -app.include_router(table.router) - -if __name__ == '__main__': - try: - ssl_enable = config["SSL_ENABLE"] - if ssl_enable: - uvicorn.run(app, host=config["UVICORN_IP"], port=int(config["UVICORN_PORT"]), - proxy_headers=True, forwarded_allow_ips='*', - ssl_certfile=config["SSL_CERTFILE"], - ssl_keyfile=config["SSL_KEYFILE"], - ) - else: - uvicorn.run(app, host=config["UVICORN_IP"], port=int(config["UVICORN_PORT"]), - proxy_headers=True, forwarded_allow_ips='*' - ) - except Exception as e: - exit(1) diff --git a/chat2db/app/base/ac_automation.py b/chat2db/app/base/ac_automation.py deleted file mode 100644 index 3012f2b..0000000 --- a/chat2db/app/base/ac_automation.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -import copy -import logging -import sys - -class Node: - def __init__(self, dep, pre_id): - self.dep = dep - self.pre_id = pre_id - self.pre_nearest_children_id = {} - self.children_id = {} - self.data_frame = None - - -logging.basicConfig(stream=sys.stdout, level=logging.INFO, - format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') - - -class DictTree: - def __init__(self): - self.root = 0 - self.node_list = [Node(0, -1)] - - def load_data(self, data_dict): - for key in data_dict: - self.insert_data(key, data_dict[key]) - self.init_pre() - - def insert_data(self, keyword, data_frame): - if not isinstance(keyword,str): - return - if len(keyword) == 0: - return - node_index = self.root - try: - for i in range(len(keyword)): - if keyword[i] not in self.node_list[node_index].children_id.keys(): - self.node_list.append(Node(self.node_list[node_index].dep+1, 0)) - self.node_list[node_index].children_id[keyword[i]] = len(self.node_list)-1 - node_index = self.node_list[node_index].children_id[keyword[i]] - except Exception as e: - logging.error(f'关键字插入失败由于:{e}') - return - self.node_list[node_index].data_frame = data_frame - - def init_pre(self): - q = [self.root] - l = 0 - r = 1 - try: - while l < r: - node_index = q[l] - self.node_list[node_index].pre_nearest_children_id = self.node_list[self.node_list[node_index].pre_id].children_id.copy() - l += 1 - for key, val in self.node_list[node_index].children_id.items(): - q.append(val) - r += 1 - if key in self.node_list[node_index].pre_nearest_children_id.keys(): - pre_id = self.node_list[node_index].pre_nearest_children_id[key] - self.node_list[val].pre_id = pre_id - self.node_list[node_index].pre_nearest_children_id[key] = val - except Exception as e: - logging.error(f'字典树前缀构建失败由于:{e}') - return - - def get_results(self, content: str): - content = content.lower() - pre_node_index = self.root - nex_node_index = None - results = [] - logging.info(f'当前问题{content}') - try: - for i in range(len(content)): - if content[i] in self.node_list[pre_node_index].pre_nearest_children_id.keys(): - nex_node_index = self.node_list[pre_node_index].pre_nearest_children_id[content[i]] - else: - nex_node_index = 0 - if self.node_list[pre_node_index].dep >= self.node_list[nex_node_index].dep: - if self.node_list[pre_node_index].data_frame is not None: - results.extend(copy.deepcopy(self.node_list[pre_node_index].data_frame)) - pre_node_index = nex_node_index - logging.info(f'当前深度{self.node_list[pre_node_index].dep}') - if self.node_list[pre_node_index].data_frame is not None: - results.extend(copy.deepcopy(self.node_list[pre_node_index].data_frame)) - except Exception as e: - logging.error(f'结果获取失败由于:{e}') - return results diff --git a/chat2db/app/base/meta_databbase.py b/chat2db/app/base/meta_databbase.py deleted file mode 100644 index b21b1f1..0000000 --- a/chat2db/app/base/meta_databbase.py +++ /dev/null @@ -1,18 +0,0 @@ -import sys -import logging -logging.basicConfig(stream=sys.stdout, level=logging.INFO, - format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') - - -class MetaDatabase: - @staticmethod - def result_to_json(results): - """ - 将 SQL 查询结果解析为 JSON 格式的数据结构,支持多种数据类型 - """ - try: - results = [result._asdict() for result in results] - return results - except Exception as e: - logging.error(f"数据库查询结果解析失败由于: {e}") - raise e diff --git a/chat2db/app/base/mysql.py b/chat2db/app/base/mysql.py deleted file mode 100644 index b47322b..0000000 --- a/chat2db/app/base/mysql.py +++ /dev/null @@ -1,217 +0,0 @@ - -import asyncio -import aiomysql -import concurrent.futures -import logging -from sqlalchemy.orm import sessionmaker -from sqlalchemy import create_engine, text -import sys -from concurrent.futures import ThreadPoolExecutor -from urllib.parse import urlparse -from chat2db.app.base.meta_databbase import MetaDatabase -logging.basicConfig(stream=sys.stdout, level=logging.INFO, - format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') - - -class Mysql(MetaDatabase): - executor = ThreadPoolExecutor(max_workers=10) - - async def test_database_connection(database_url): - try: - with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: - future = executor.submit(Mysql._connect_and_query, database_url) - result = future.result(timeout=5) - return result - except concurrent.futures.TimeoutError: - logging.error('mysql数据库连接超时') - return False - except Exception as e: - logging.error(f'mysql数据库连接失败由于{e}') - return False - - @staticmethod - def _connect_and_query(database_url): - try: - engine = create_engine( - database_url, - pool_size=20, - max_overflow=80, - pool_recycle=300, - pool_pre_ping=True - ) - session = sessionmaker(bind=engine)() - session.execute(text("SELECT 1")) - session.close() - return True - except Exception as e: - raise e - - @staticmethod - async def drop_table(database_url, table_name): - engine = create_engine( - database_url, - pool_size=20, - max_overflow=80, - pool_recycle=300, - pool_pre_ping=True - ) - with sessionmaker(engine)() as session: - sql_str = f"DROP TABLE IF EXISTS {table_name};" - session.execute(text(sql_str)) - - @staticmethod - async def select_primary_key_and_keyword_from_table(database_url, table_name, keyword): - try: - url = urlparse(database_url) - db_config = { - 'host': url.hostname or 'localhost', - 'port': int(url.port or 3306), - 'user': url.username or 'root', - 'password': url.password or '', - 'db': url.path.strip('/') - } - - async with aiomysql.create_pool(**db_config) as pool: - async with pool.acquire() as conn: - async with conn.cursor() as cur: - primary_key_query = """ - SELECT - COLUMNS.column_name - FROM - information_schema.tables AS TABLES - INNER JOIN information_schema.columns AS COLUMNS ON TABLES.table_name = COLUMNS.table_name - WHERE - TABLES.table_schema = %s AND TABLES.table_name = %s AND COLUMNS.column_key = 'PRI'; - """ - - # 尝试执行查询 - await cur.execute(primary_key_query, (db_config['db'], table_name)) - primary_key_list = await cur.fetchall() - if not primary_key_list: - return [] - primary_key_names = ', '.join([record[0] for record in primary_key_list]) - columns = f'{primary_key_names}, {keyword}' - query = f'SELECT {columns} FROM {table_name};' - await cur.execute(query) - results = await cur.fetchall() - - def _process_results(results, primary_key_list): - tmp_dict = {} - for row in results: - key = str(row[-1]) - if key not in tmp_dict: - tmp_dict[key] = [] - pk_values = [str(row[i]) for i in range(len(primary_key_list))] - tmp_dict[key].append(pk_values) - - return { - 'primary_key_list': [record[0] for record in primary_key_list], - 'keyword_value_dict': tmp_dict - } - result = await asyncio.get_event_loop().run_in_executor( - Mysql.executor, - _process_results, - results, - primary_key_list - ) - return result - - except Exception as e: - logging.error(f'mysql数据检索失败由于 {e}') - - @staticmethod - async def assemble_sql_query_base_on_primary_key(table_name, primary_key_list, primary_key_value_list): - sql_str = f'SELECT * FROM {table_name} where ' - for i in range(len(primary_key_list)): - sql_str += primary_key_list[i]+'= \''+primary_key_value_list[i]+'\'' - if i != len(primary_key_list)-1: - sql_str += ' and ' - sql_str += ';' - return sql_str - - @staticmethod - async def get_table_info(database_url, table_name): - engine = create_engine( - database_url, - pool_size=20, - max_overflow=80, - pool_recycle=300, - pool_pre_ping=True - ) - with sessionmaker(engine)() as session: - sql_str = f"""SELECT TABLE_COMMENT FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '{table_name}';""" - table_note = session.execute(text(sql_str)).one()[0] - if table_note == '': - table_note = table_name - table_note = { - 'table_name': table_name, - 'table_note': table_note - } - return table_note - - @staticmethod - async def get_column_info(database_url, table_name): - engine = create_engine( - database_url, - pool_size=20, - max_overflow=80, - pool_recycle=300, - pool_pre_ping=True - ) - with engine.connect() as conn: - sql_str = f""" - SELECT column_name, column_type, column_comment FROM information_schema.columns where TABLE_NAME='{table_name}'; - """ - results = conn.execute(text(sql_str), {'table_name': table_name}).all() - column_info_list = [] - for result in results: - column_info_list.append({'column_name': result[0], 'column_type': result[1], 'column_note': result[2]}) - return column_info_list - - @staticmethod - async def get_all_table_name_from_database_url(database_url): - engine = create_engine( - database_url, - pool_size=20, - max_overflow=80, - pool_recycle=300, - pool_pre_ping=True - ) - with engine.connect() as connection: - result = connection.execute(text("SHOW TABLES")) - table_name_list = [row[0] for row in result] - return table_name_list - - @staticmethod - async def get_rand_data(database_url, table_name, cnt=10): - engine = create_engine( - database_url, - pool_size=20, - max_overflow=80, - pool_recycle=300, - pool_pre_ping=True - ) - try: - with sessionmaker(engine)() as session: - sql_str = f'''SELECT * - FROM {table_name} - ORDER BY RAND() - LIMIT {cnt};''' - dataframe = str(session.execute(text(sql_str)).all()) - except Exception as e: - dataframe = '' - logging.error(f'随机从数据库中获取数据失败由于{e}') - return dataframe - - @staticmethod - async def try_excute(database_url, sql_str): - engine = create_engine( - database_url, - pool_size=20, - max_overflow=80, - pool_recycle=300, - pool_pre_ping=True - ) - with sessionmaker(engine)() as session: - result = session.execute(text(sql_str)).all() - return Mysql.result_to_json(result) diff --git a/chat2db/app/base/postgres.py b/chat2db/app/base/postgres.py deleted file mode 100644 index a29a442..0000000 --- a/chat2db/app/base/postgres.py +++ /dev/null @@ -1,236 +0,0 @@ -import asyncio -import asyncpg -import concurrent.futures -import logging -from sqlalchemy.orm import sessionmaker -from sqlalchemy import create_engine, text -import sys -from concurrent.futures import ThreadPoolExecutor -from chat2db.app.base.meta_databbase import MetaDatabase -logging.basicConfig(stream=sys.stdout, level=logging.INFO, - format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') - - -def handler(signum, frame): - raise TimeoutError("超时") - - -class Postgres(MetaDatabase): - executor = ThreadPoolExecutor(max_workers=10) - - async def test_database_connection(database_url): - try: - with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: - future = executor.submit(Postgres._connect_and_query, database_url) - result = future.result(timeout=5) - return result - except concurrent.futures.TimeoutError: - logging.error('postgres数据库连接超时') - return False - except Exception as e: - logging.error(f'postgres数据库连接失败由于{e}') - return False - - @staticmethod - def _connect_and_query(database_url): - try: - engine = create_engine( - database_url, - pool_size=20, - max_overflow=80, - pool_recycle=300, - pool_pre_ping=True - ) - session = sessionmaker(bind=engine)() - session.execute(text("SELECT 1")) - session.close() - return True - except Exception as e: - raise e - - @staticmethod - async def drop_table(database_url, table_name): - engine = create_engine( - database_url, - pool_size=20, - max_overflow=80, - pool_recycle=300, - pool_pre_ping=True - ) - with sessionmaker(engine)() as session: - sql_str = f"DROP TABLE IF EXISTS {table_name};" - session.execute(text(sql_str)) - - @staticmethod - async def select_primary_key_and_keyword_from_table(database_url, table_name, keyword): - try: - dsn = database_url.replace('+psycopg2', '') - conn = await asyncpg.connect(dsn=dsn) - primary_key_query = """ - SELECT - kcu.column_name - FROM - information_schema.table_constraints AS tc - JOIN information_schema.key_column_usage AS kcu - ON tc.constraint_name = kcu.constraint_name - WHERE - tc.constraint_type = 'PRIMARY KEY' - AND tc.table_name = $1; - """ - primary_key_list = await conn.fetch(primary_key_query, table_name) - if not primary_key_list: - return [] - columns = ', '.join([record['column_name'] for record in primary_key_list]) + f', {keyword}' - query = f'SELECT {columns} FROM {table_name};' - results = await conn.fetch(query) - - def _process_results(results, primary_key_list): - tmp_dict = {} - for row in results: - key = str(row[-1]) - if key not in tmp_dict: - tmp_dict[key] = [] - pk_values = [str(row[i]) for i in range(len(primary_key_list))] - tmp_dict[key].append(pk_values) - - return { - 'primary_key_list': [record['column_name'] for record in primary_key_list], - 'keyword_value_dict': tmp_dict - } - result = await asyncio.get_event_loop().run_in_executor( - Postgres.executor, - _process_results, - results, - primary_key_list - ) - await conn.close() - - return result - except Exception as e: - logging.error(f'postgres数据检索失败由于 {e}') - return None - - @staticmethod - async def assemble_sql_query_base_on_primary_key(table_name, primary_key_list, primary_key_value_list): - sql_str = f'SELECT * FROM {table_name} where ' - for i in range(len(primary_key_list)): - sql_str += primary_key_list[i]+'='+'\''+primary_key_value_list[i]+'\'' - if i != len(primary_key_list)-1: - sql_str += ' and ' - sql_str += ';' - return sql_str - - @staticmethod - async def get_table_info(database_url, table_name): - engine = create_engine( - database_url, - pool_size=20, - max_overflow=80, - pool_recycle=300, - pool_pre_ping=True - ) - with engine.connect() as conn: - sql_str = """ - SELECT - d.description AS table_description - FROM - pg_class t - JOIN - pg_description d ON t.oid = d.objoid - WHERE - t.relkind = 'r' AND - d.objsubid = 0 AND - t.relname = :table_name; """ - result = conn.execute(text(sql_str), {'table_name': table_name}).one_or_none() - if result is None: - table_note = table_name - else: - table_note = result[0] - table_note = { - 'table_name': table_name, - 'table_note': table_note - } - return table_note - - @staticmethod - async def get_column_info(database_url, table_name): - engine = create_engine( - database_url, - pool_size=20, - max_overflow=80, - pool_recycle=300, - pool_pre_ping=True - ) - with engine.connect() as conn: - sql_str = """ - SELECT - a.attname as 字段名, - format_type(a.atttypid,a.atttypmod) as 类型, - col_description(a.attrelid,a.attnum) as 注释 - FROM - pg_class as c,pg_attribute as a - where - a.attrelid = c.oid - and - a.attnum>0 - and - c.relname = :table_name; - """ - results = conn.execute(text(sql_str), {'table_name': table_name}).all() - column_info_list = [] - for result in results: - column_info_list.append({'column_name': result[0], 'column_type': result[1], 'column_note': result[2]}) - return column_info_list - - @staticmethod - async def get_all_table_name_from_database_url(database_url): - engine = create_engine( - database_url, - pool_size=20, - max_overflow=80, - pool_recycle=300, - pool_pre_ping=True - ) - with engine.connect() as connection: - sql_str = ''' - SELECT table_name - FROM information_schema.tables - WHERE table_schema = 'public'; - ''' - result = connection.execute(text(sql_str)) - table_name_list = [row[0] for row in result] - return table_name_list - - @staticmethod - async def get_rand_data(database_url, table_name, cnt=10): - engine = create_engine( - database_url, - pool_size=20, - max_overflow=80, - pool_recycle=300, - pool_pre_ping=True - ) - try: - with sessionmaker(engine)() as session: - sql_str = f'''SELECT * - FROM {table_name} - ORDER BY RANDOM() - LIMIT {cnt};''' - dataframe = str(session.execute(text(sql_str)).all()) - except Exception as e: - dataframe = '' - logging.error(f'随机从数据库中获取数据失败由于{e}') - return dataframe - - @staticmethod - async def try_excute(database_url, sql_str): - engine = create_engine( - database_url, - pool_size=20, - max_overflow=80, - pool_recycle=300, - pool_pre_ping=True - ) - with sessionmaker(engine)() as session: - result=session.execute(text(sql_str)).all() - return Postgres.result_to_json(result) diff --git a/chat2db/app/base/vectorize.py b/chat2db/app/base/vectorize.py deleted file mode 100644 index 5362047..0000000 --- a/chat2db/app/base/vectorize.py +++ /dev/null @@ -1,47 +0,0 @@ -import requests -import urllib3 -from chat2db.config.config import config -import json -import sys -import logging - -logging.basicConfig(stream=sys.stdout, level=logging.INFO, - format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - - -class Vectorize(): - @staticmethod - async def vectorize_embedding(text): - if config['EMBEDDING_TYPE']=='openai': - headers = { - "Authorization": f"Bearer {config['EMBEDDING_API_KEY']}" - } - data = { - "input": text, - "model": config["EMBEDDING_MODEL_NAME"], - "encoding_format": "float" - } - try: - res = requests.post(url=config["EMBEDDING_ENDPOINT"],headers=headers, json=data, verify=False) - if res.status_code != 200: - return None - return res.json()['data'][0]['embedding'] - except Exception as e: - logging.error(f"Embedding error failed due to: {e}") - return None - elif config['EMBEDDING_TYPE'] =='mindie': - try: - data = { - "inputs": text, - } - res = requests.post(url=config["EMBEDDING_ENDPOINT"], json=data, verify=False) - if res.status_code != 200: - return None - return json.loads(res.text)[0] - except Exception as e: - logging.error(f"Embedding error failed due to: {e}") - return None - else: - return None diff --git a/chat2db/app/router/database.py b/chat2db/app/router/database.py deleted file mode 100644 index 37aacca..0000000 --- a/chat2db/app/router/database.py +++ /dev/null @@ -1,191 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. - -import logging -import uuid -from fastapi import APIRouter, status -from typing import Optional -import sys -from chat2db.model.request import DatabaseAddRequest, DatabaseDelRequest, DatabaseSqlGenerateRequest -from chat2db.model.response import ResponseData -from chat2db.manager.database_info_manager import DatabaseInfoManager -from chat2db.manager.table_info_manager import TableInfoManager -from chat2db.manager.column_info_manager import ColumnInfoManager -from chat2db.app.service.diff_database_service import DiffDatabaseService -from chat2db.app.service.sql_generate_service import SqlGenerateService -from chat2db.app.service.keyword_service import keyword_service -from chat2db.app.base.vectorize import Vectorize - -logging.basicConfig(stream=sys.stdout, level=logging.INFO, - format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') - -router = APIRouter( - prefix="/database" -) - - -@router.post("/add", response_model=ResponseData) -async def add_database_info(request: DatabaseAddRequest): - database_url = request.database_url - database_type = DiffDatabaseService.get_database_type_from_url(database_url) - if not DiffDatabaseService.is_database_type_allow(database_type): - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="不支持当前数据库", - result={} - ) - flag = await DiffDatabaseService.get_database_service(database_type).test_database_connection(database_url) - if not flag: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="无法连接当前数据库", - result={} - ) - database_id = await DatabaseInfoManager.add_database(database_url) - if database_id is None: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="数据库连接添加失败,当前存在重复数据库配置", - result={'database_id': database_id} - ) - return ResponseData( - code=status.HTTP_200_OK, - message="success", - result={'database_id': database_id} - ) - - -@router.post("/del", response_model=ResponseData) -async def del_database_info(request: DatabaseDelRequest): - database_id = request.database_id - database_url = request.database_url - if database_id: - flag = await DatabaseInfoManager.del_database_by_id(database_id) - else: - flag = await DatabaseInfoManager.del_database_by_url(database_url) - if not flag: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="删除数据库配置失败,数据库配置不存在", - result={} - ) - return ResponseData( - code=status.HTTP_200_OK, - message="删除数据库配置成功", - result={} - ) - - -@router.get("/query", response_model=ResponseData) -async def query_database_info(): - database_info_list = await DatabaseInfoManager.get_all_database_info() - return ResponseData( - code=status.HTTP_200_OK, - message="查询数据库配置成功", - result={'database_info_list': database_info_list} - ) - - -@router.get("/list", response_model=ResponseData) -async def list_table_in_database(database_id: uuid.UUID, table_filter: str = ''): - database_url = await DatabaseInfoManager.get_database_url_by_id(database_id) - database_type = DiffDatabaseService.get_database_type_from_url(database_url) - if database_url is None: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="查询数据库内表格配置失败,数据库配置不存在", - result={} - ) - if not DiffDatabaseService.is_database_type_allow(database_type): - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="不支持当前数据库", - result={} - ) - flag = await DiffDatabaseService.get_database_service(database_type).test_database_connection(database_url) - if not flag: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="无法连接当前数据库", - result={} - ) - table_name_list = await DiffDatabaseService.get_database_service(database_type).get_all_table_name_from_database_url(database_url) - results = [] - for table_name in table_name_list: - if table_filter in table_name: - results.append(table_name) - return ResponseData( - code=status.HTTP_200_OK, - message="查询数据库配置成功", - result={'table_name_list': results} - ) - - -@router.post("/sql", response_model=ResponseData) -async def generate_sql_from_database(request: DatabaseSqlGenerateRequest): - database_url = request.database_url - table_name_list = request.table_name_list - question = request.question - use_llm_enhancements = request.use_llm_enhancements - database_type = DiffDatabaseService.get_database_type_from_url(database_url) - if not DiffDatabaseService.is_database_type_allow(database_type): - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="不支持当前数据库", - result={} - ) - flag = await DiffDatabaseService.get_database_service(database_type).test_database_connection(database_url) - if not flag: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="无法连接当前数据库", - result={} - ) - tmp_table_name_list = await DiffDatabaseService.get_database_service(database_type).get_all_table_name_from_database_url(database_url) - database_id = await DatabaseInfoManager.get_database_id_by_url(database_url) - if database_id is None: - database_id = await DatabaseInfoManager.add_database(database_url) - for table_name in tmp_table_name_list: - try: - tmp_dict = await DiffDatabaseService.get_database_service(database_type).get_table_info(database_url, table_name) - table_note = tmp_dict['table_note'] - table_note_vector = await Vectorize.vectorize_embedding(table_note) - table_id = await TableInfoManager.add_table_info(database_id, table_name, table_note, table_note_vector) - column_info_list = await DiffDatabaseService.get_database_service(database_type).get_column_info(database_url, table_name) - for column_info in column_info_list: - await ColumnInfoManager.add_column_info_with_table_id( - table_id, column_info['column_name'], - column_info['column_type'], - column_info['column_note']) - except Exception as e: - import traceback - logging.error(f'{table_name}') - logging.error(f'表格信息获取失败由于:{traceback.format_exc()}') - continue - if table_name_list: - table_id_list = [] - for table_name in table_name_list: - table_id = await TableInfoManager.get_table_id_by_database_id_and_table_name(database_id, table_name) - if table_id is None: - continue - table_id_list.append(table_id) - else: - table_id_list = None - results = {} - sql_list = await SqlGenerateService.generate_sql_base_on_example( - database_id=database_id, question=question, table_id_list=table_id_list, - use_llm_enhancements=use_llm_enhancements) - try: - sql_list += await keyword_service.generate_sql(question, database_id, table_id_list) - results['sql_list'] = sql_list[:request.topk] - results['database_url'] = database_url - except Exception as e: - logging.error(f'sql生成失败由于{e}') - return ResponseData( - code=status.HTTP_400_BAD_REQUEST, - message="sql生成失败", - result={} - ) - return ResponseData( - code=status.HTTP_200_OK, message="success", - result=results - ) diff --git a/chat2db/app/router/sql_example.py b/chat2db/app/router/sql_example.py deleted file mode 100644 index 08f9139..0000000 --- a/chat2db/app/router/sql_example.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. - -import logging -import uuid -from fastapi import APIRouter, status -import sys - -from chat2db.model.request import SqlExampleAddRequest, SqlExampleDelRequest, SqlExampleUpdateRequest, SqlExampleGenerateRequest -from chat2db.model.response import ResponseData -from chat2db.manager.database_info_manager import DatabaseInfoManager -from chat2db.manager.table_info_manager import TableInfoManager -from chat2db.manager.sql_example_manager import SqlExampleManager -from chat2db.app.service.sql_generate_service import SqlGenerateService -from chat2db.app.base.vectorize import Vectorize -logging.basicConfig(stream=sys.stdout, level=logging.INFO, - format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') - -router = APIRouter( - prefix="/sql/example" -) - - -@router.post("/add", response_model=ResponseData) -async def add_sql_example(request: SqlExampleAddRequest): - table_id = request.table_id - table_info = await TableInfoManager.get_table_info_by_table_id(table_id) - if table_info is None: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="表格不存在", - result={} - ) - database_id = table_info['database_id'] - question = request.question - question_vector = await Vectorize.vectorize_embedding(question) - sql = request.sql - try: - sql_example_id = await SqlExampleManager.add_sql_example(question, sql, table_id, question_vector) - except Exception as e: - logging.error(f'sql案例添加失败由于{e}') - return ResponseData( - code=status.HTTP_400_BAD_REQUEST, - message="sql案例添加失败", - result={} - ) - return ResponseData( - code=status.HTTP_200_OK, - message="success", - result={'sql_example_id': sql_example_id} - ) - - -@router.post("/del", response_model=ResponseData) -async def del_sql_example(request: SqlExampleDelRequest): - sql_example_id = request.sql_example_id - flag = await SqlExampleManager.del_sql_example_by_id(sql_example_id) - if not flag: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="sql案例不存在", - result={} - ) - return ResponseData( - code=status.HTTP_200_OK, - message="sql案例删除成功", - result={} - ) - - -@router.get("/query", response_model=ResponseData) -async def query_sql_example(table_id: uuid.UUID): - sql_example_list = await SqlExampleManager.query_sql_example_by_table_id(table_id) - return ResponseData( - code=status.HTTP_200_OK, - message="查询sql案例成功", - result={'sql_example_list': sql_example_list} - ) - - -@router.post("/update", response_model=ResponseData) -async def update_sql_example(request: SqlExampleUpdateRequest): - sql_example_id = request.sql_example_id - question = request.question - question_vector = await Vectorize.vectorize_embedding(question) - sql = request.sql - flag = await SqlExampleManager.update_sql_example_by_id(sql_example_id, question, sql, question_vector) - if not flag: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="sql案例不存在", - result={} - ) - return ResponseData( - code=status.HTTP_200_OK, - message="sql案例更新成功", - result={} - ) - - -@router.post("/generate", response_model=ResponseData) -async def generate_sql_example(request: SqlExampleGenerateRequest): - table_id = request.table_id - generate_cnt = request.generate_cnt - table_info = await TableInfoManager.get_table_info_by_table_id(table_id) - if table_info is None: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="表格不存在", - result={} - ) - table_name = table_info['table_name'] - database_id = table_info['database_id'] - database_url = await DatabaseInfoManager.get_database_url_by_id(database_id) - sql_var = request.sql_var - sql_example_list = [] - for i in range(generate_cnt): - try: - tmp_dict = await SqlGenerateService.generate_sql_base_on_data(database_url, table_name, sql_var) - except Exception as e: - logging.error(f'sql案例生成失败由于{e}') - continue - if tmp_dict is None: - continue - question = tmp_dict['question'] - question_vector = await Vectorize.vectorize_embedding(question) - sql = tmp_dict['sql'] - await SqlExampleManager.add_sql_example(question, sql, table_id, question_vector) - tmp_dict['database_id'] = database_id - tmp_dict['table_id'] = table_id - sql_example_list.append(tmp_dict) - return ResponseData( - code=status.HTTP_200_OK, - message="sql案例生成成功", - result={ - 'sql_example_list': sql_example_list - } - ) diff --git a/chat2db/app/router/sql_generate.py b/chat2db/app/router/sql_generate.py deleted file mode 100644 index 69ff0d2..0000000 --- a/chat2db/app/router/sql_generate.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. - -import logging -from fastapi import APIRouter, status -import sys - -from chat2db.manager.database_info_manager import DatabaseInfoManager -from chat2db.manager.table_info_manager import TableInfoManager -from chat2db.manager.column_info_manager import ColumnInfoManager -from chat2db.model.request import SqlGenerateRequest, SqlRepairRequest, SqlExcuteRequest -from chat2db.model.response import ResponseData -from chat2db.app.service.sql_generate_service import SqlGenerateService -from chat2db.app.service.keyword_service import keyword_service -from chat2db.app.service.diff_database_service import DiffDatabaseService -logging.basicConfig(stream=sys.stdout, level=logging.INFO, - format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') - -router = APIRouter( - prefix="/sql" -) - - -@router.post("/generate", response_model=ResponseData) -async def generate_sql(request: SqlGenerateRequest): - database_id = request.database_id - database_url = await DatabaseInfoManager.get_database_url_by_id(database_id) - table_id_list = request.table_id_list - question = request.question - use_llm_enhancements = request.use_llm_enhancements - results = {} - sql_list = await SqlGenerateService.generate_sql_base_on_example( - database_id=database_id, question=question, table_id_list=table_id_list, - use_llm_enhancements=use_llm_enhancements) - try: - sql_list += await keyword_service.generate_sql(question, database_id, table_id_list) - results['sql_list'] = sql_list[:request.topk] - results['database_url'] = database_url - except Exception as e: - logging.error(f'sql生成失败由于{e}') - return ResponseData( - code=status.HTTP_400_BAD_REQUEST, - message="sql生成失败", - result={} - ) - return ResponseData( - code=status.HTTP_200_OK, message="success", - result=results - ) - - -@router.post("/repair", response_model=ResponseData) -async def repair_sql(request: SqlRepairRequest): - database_id = request.database_id - table_id = request.table_id - database_url = await DatabaseInfoManager.get_database_url_by_id(database_id) - database_type = DiffDatabaseService.get_database_type_from_url(database_url) - if database_url is None: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="当前数据库配置不存在", - result={} - ) - table_info = await TableInfoManager.get_table_info_by_table_id(table_id) - if table_info is None: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="表格不存在", - result={} - ) - if table_info['database_id'] != database_id: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="表格不属于当前数据库", - result={} - ) - column_info_list = await ColumnInfoManager.get_column_info_by_table_id(table_id) - sql = request.sql - message = request.message - question = request.question - try: - sql = await SqlGenerateService.repair_sql(database_type, table_info, column_info_list, sql, message, question) - except Exception as e: - logging.error(f'sql修复失败由于{e}') - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="sql修复失败", - result={} - ) - return ResponseData( - code=status.HTTP_200_OK, - message="sql修复成功", - result={'database_id': database_id, - 'table_id': table_id, - 'sql': sql} - ) - - -@router.post("/execute", response_model=ResponseData) -async def execute_sql(request: SqlExcuteRequest): - database_id = request.database_id - sql = request.sql - database_url = await DatabaseInfoManager.get_database_url_by_id(database_id) - if database_url is None: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="当前数据库配置不存在", - result={} - ) - database_type = DiffDatabaseService.get_database_type_from_url(database_url) - try: - results = await DiffDatabaseService.database_map[database_type].try_excute(database_url, sql) - except Exception as e: - import traceback - logging.error(f'sql执行失败由于{traceback.format_exc()}') - return ResponseData( - code=status.HTTP_500_INTERNAL_SERVER_ERROR, - message="sql执行失败", - result={'Error': str(e)} - ) - return ResponseData( - code=status.HTTP_200_OK, - message="sql执行成功", - result=results - ) diff --git a/chat2db/app/router/table.py b/chat2db/app/router/table.py deleted file mode 100644 index 33ca4f9..0000000 --- a/chat2db/app/router/table.py +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. - -import logging -import uuid -from fastapi import APIRouter, status -import sys - -from chat2db.model.request import TableAddRequest, TableDelRequest, EnableColumnRequest -from chat2db.model.response import ResponseData -from chat2db.manager.database_info_manager import DatabaseInfoManager -from chat2db.manager.table_info_manager import TableInfoManager -from chat2db.manager.column_info_manager import ColumnInfoManager -from chat2db.app.service.diff_database_service import DiffDatabaseService -from chat2db.app.base.vectorize import Vectorize -from chat2db.app.service.keyword_service import keyword_service -logging.basicConfig(stream=sys.stdout, level=logging.INFO, - format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') - -router = APIRouter( - prefix="/table" -) - - -@router.post("/add", response_model=ResponseData) -async def add_database_info(request: TableAddRequest): - database_id = request.database_id - database_url = await DatabaseInfoManager.get_database_url_by_id(database_id) - if database_url is None: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="当前数据库配置不存在", - result={} - ) - database_type = DiffDatabaseService.get_database_type_from_url(database_url) - flag = await DiffDatabaseService.get_database_service(database_type).test_database_connection(database_url) - if not flag: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="无法连接当前数据库", - result={} - ) - table_name = request.table_name - table_name_list = await DiffDatabaseService.get_database_service(database_type).get_all_table_name_from_database_url(database_url) - if table_name not in table_name_list: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="表格不存在", - result={} - ) - tmp_dict = await DiffDatabaseService.get_database_service(database_type).get_table_info(database_url, table_name) - table_note = tmp_dict['table_note'] - table_note_vector = await Vectorize.vectorize_embedding(table_note) - table_id = await TableInfoManager.add_table_info(database_id, table_name, table_note, table_note_vector) - if table_id is None: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="表格添加失败,当前存在重复表格", - result={} - ) - column_info_list = await DiffDatabaseService.get_database_service(database_type).get_column_info(database_url, table_name) - for column_info in column_info_list: - await ColumnInfoManager.add_column_info_with_table_id( - table_id, column_info['column_name'], - column_info['column_type'], - column_info['column_note']) - return ResponseData( - code=status.HTTP_200_OK, - message="success", - result={'table_id': table_id} - ) - - -@router.post("/del", response_model=ResponseData) -async def del_table_info(request: TableDelRequest): - table_id = request.table_id - flag = await TableInfoManager.del_table_by_id(table_id) - if not flag: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="表格不存在", - result={} - ) - return ResponseData( - code=status.HTTP_200_OK, - message="删除表格成功", - result={} - ) - - -@router.get("/query", response_model=ResponseData) -async def query_table_info(database_id: uuid.UUID): - database_url = await DatabaseInfoManager.get_database_url_by_id(database_id) - if database_url is None: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="当前数据库配置不存在", - result={} - ) - table_info_list = await TableInfoManager.get_table_info_by_database_id(database_id) - return ResponseData( - code=status.HTTP_200_OK, - message="查询表格成功", - result={'table_info_list': table_info_list} - ) - - -@router.get("/column/query", response_model=ResponseData) -async def query_column(table_id: uuid.UUID): - column_info_list = await ColumnInfoManager.get_column_info_by_table_id(table_id) - return ResponseData( - code=status.HTTP_200_OK, - message="", - result={'column_info_list': column_info_list} - ) - - -@router.post("/column/enable", response_model=ResponseData) -async def enable_column(request: EnableColumnRequest): - column_id = request.column_id - enable = request.enable - flag = await ColumnInfoManager.update_column_info_enable(column_id, enable) - if not flag: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="列不存在", - result={} - ) - column_info = await ColumnInfoManager.get_column_info_by_column_id(column_id) - column_name = column_info['column_name'] - table_id = column_info['table_id'] - table_info = await TableInfoManager.get_table_info_by_table_id(table_id) - database_id = table_info['database_id'] - if enable: - flag = await keyword_service.add(database_id, table_id, column_name) - else: - flag = await keyword_service.del_by_column_name(database_id, table_id, column_name) - if not flag: - return ResponseData( - code=status.HTTP_422_UNPROCESSABLE_ENTITY, - message="列关键字功能开启/关闭失败", - result={} - ) - return ResponseData( - code=status.HTTP_200_OK, - message="列关键字功能开启/关闭成功", - result={} - ) diff --git a/chat2db/app/service/diff_database_service.py b/chat2db/app/service/diff_database_service.py deleted file mode 100644 index bb9f979..0000000 --- a/chat2db/app/service/diff_database_service.py +++ /dev/null @@ -1,28 +0,0 @@ -import re -from urllib.parse import urlparse -from chat2db.app.base.mysql import Mysql -from chat2db.app.base.postgres import Postgres - - -class DiffDatabaseService(): - database_types = ["mysql", "postgresql", "opengauss"] - database_map = {"mysql": Mysql, "postgresql": Postgres, "opengauss": Postgres} - - @staticmethod - def get_database_service(database_type): - if database_type not in DiffDatabaseService.database_types: - raise f"不支持当前数据库类型{database_type}" - return DiffDatabaseService.database_map[database_type] - - @staticmethod - def get_database_type_from_url(database_url): - result = urlparse(database_url) - try: - database_type = result.scheme.split('+')[0] - except Exception as e: - raise e - return database_type.lower() - - @staticmethod - def is_database_type_allow(database_type): - return database_type in DiffDatabaseService.database_types diff --git a/chat2db/app/service/keyword_service.py b/chat2db/app/service/keyword_service.py deleted file mode 100644 index 685c341..0000000 --- a/chat2db/app/service/keyword_service.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -import asyncio -import copy -import uuid -import sys -import threading -from concurrent.futures import ThreadPoolExecutor -from chat2db.app.service.diff_database_service import DiffDatabaseService -from chat2db.app.base.ac_automation import DictTree -from chat2db.manager.database_info_manager import DatabaseInfoManager -from chat2db.manager.table_info_manager import TableInfoManager -from chat2db.manager.column_info_manager import ColumnInfoManager -import logging - -logging.basicConfig(stream=sys.stdout, level=logging.INFO, - format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') - - -class KeywordManager(): - def __init__(self): - self.keyword_asset_dict = {} - self.lock = threading.Lock() - self.data_frame_dict = {} - - async def load_keywords(self): - database_info_list = await DatabaseInfoManager.get_all_database_info() - for database_info in database_info_list: - database_id = database_info['database_id'] - table_info_list = await TableInfoManager.get_table_info_by_database_id(database_id) - cnt=0 - for table_info in table_info_list: - table_id = table_info['table_id'] - column_info_list = await ColumnInfoManager.get_column_info_by_table_id(table_id, True) - for i in range(len(column_info_list)): - column_info = column_info_list[i] - cnt+=1 - try: - column_name = column_info['column_name'] - await self.add(database_id, table_id, column_name) - except Exception as e: - logging.error('关键字数据结构生成失败') - def add_excutor(self, rd_id, database_id, table_id, table_info, column_info_list, column_name): - tmp_dict = self.data_frame_dict[rd_id] - tmp_dict_tree = DictTree() - tmp_dict_tree.load_data(tmp_dict['keyword_value_dict']) - if database_id not in self.keyword_asset_dict.keys(): - self.keyword_asset_dict[database_id] = {} - with self.lock: - if table_id not in self.keyword_asset_dict[database_id].keys(): - self.keyword_asset_dict[database_id][table_id] = {} - self.keyword_asset_dict[database_id][table_id]['table_info'] = table_info - self.keyword_asset_dict[database_id][table_id]['column_info_list'] = column_info_list - self.keyword_asset_dict[database_id][table_id]['primary_key_list'] = copy.deepcopy( - tmp_dict['primary_key_list']) - self.keyword_asset_dict[database_id][table_id]['dict_tree_dict'] = {} - self.keyword_asset_dict[database_id][table_id]['dict_tree_dict'][column_name] = tmp_dict_tree - del self.data_frame_dict[rd_id] - - async def add(self, database_id, table_id, column_name): - database_url = await DatabaseInfoManager.get_database_url_by_id(database_id) - database_type = DiffDatabaseService.get_database_type_from_url(database_url) - table_info = await TableInfoManager.get_table_info_by_table_id(table_id) - table_name = table_info['table_name'] - tmp_dict = await DiffDatabaseService.get_database_service( - database_type).select_primary_key_and_keyword_from_table(database_url, table_name, column_name) - if tmp_dict is None: - return - rd_id = str(uuid.uuid4) - self.data_frame_dict[rd_id] = tmp_dict - del database_url - column_info_list = await ColumnInfoManager.get_column_info_by_table_id(table_id) - try: - thread = threading.Thread(target=self.add_excutor, args=(rd_id, database_id, table_id, - table_info, column_info_list, column_name,)) - thread.start() - except Exception as e: - logging.error(f'创建增加线程失败由于{e}') - return False - return True - - async def update_keyword_asset(self): - database_info_list = DatabaseInfoManager.get_all_database_info() - for database_info in database_info_list: - database_id = database_info['database_id'] - table_info_list = TableInfoManager.get_table_info_by_database_id(database_id) - for table_info in table_info_list: - table_id = table_info['table_id'] - column_info_list = ColumnInfoManager.get_column_info_by_table_id(table_id, True) - for column_info in column_info_list: - await self.add(database_id, table_id, column_info['column_name']) - - async def del_by_column_name(self, database_id, table_id, column_name): - try: - with self.lock: - if database_id in self.keyword_asset_dict.keys(): - if table_id in self.keyword_asset_dict[database_id].keys(): - if column_name in self.keyword_asset_dict[database_id][table_id]['dict_tree_dict'].keys(): - del self.keyword_asset_dict[database_id][table_id]['dict_tree_dict'][column_name] - except Exception as e: - logging.error(f'字典树删除失败由于{e}') - return False - return True - - async def generate_sql(self, question, database_id, table_id_list=None): - with self.lock: - results = [] - if database_id in self.keyword_asset_dict.keys(): - database_url = await DatabaseInfoManager.get_database_url_by_id(database_id) - database_type = DiffDatabaseService.get_database_type_from_url(database_url) - for table_id in self.keyword_asset_dict[database_id].keys(): - if table_id_list is None or table_id in table_id_list: - table_info = self.keyword_asset_dict[database_id][table_id]['table_info'] - primary_key_list = self.keyword_asset_dict[database_id][table_id]['primary_key_list'] - primary_key_value_list = [] - try: - for dict_tree in self.keyword_asset_dict[database_id][table_id]['dict_tree_dict'].values(): - primary_key_value_list += dict_tree.get_results(question) - except Exception as e: - logging.error(f'从字典树中获取结果失败由于{e}') - continue - for i in range(len(primary_key_value_list)): - sql_str = await DiffDatabaseService.get_database_service(database_type).assemble_sql_query_base_on_primary_key( - table_info['table_name'], primary_key_list, primary_key_value_list[i]) - tmp_dict = {'database_id': database_id, 'table_id': table_id, 'sql': sql_str} - results.append(tmp_dict) - del database_url - return results - - -keyword_service = KeywordManager() -asyncio.run(keyword_service.load_keywords()) diff --git a/chat2db/app/service/sql_generate_service.py b/chat2db/app/service/sql_generate_service.py deleted file mode 100644 index f20f977..0000000 --- a/chat2db/app/service/sql_generate_service.py +++ /dev/null @@ -1,363 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -import asyncio -import yaml -import re -import json -import random -import sys -import uuid -import logging -from pandas.core.api import DataFrame as DataFrame - -from chat2db.manager.database_info_manager import DatabaseInfoManager -from chat2db.manager.table_info_manager import TableInfoManager -from chat2db.manager.column_info_manager import ColumnInfoManager -from chat2db.manager.sql_example_manager import SqlExampleManager -from chat2db.app.service.diff_database_service import DiffDatabaseService -from chat2db.llm.chat_with_model import LLM -from chat2db.config.config import config -from chat2db.app.base.vectorize import Vectorize - - -logging.basicConfig(stream=sys.stdout, level=logging.INFO, - format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') - - -class SqlGenerateService(): - - @staticmethod - async def merge_table_and_column_info(table_info, column_info_list): - table_name = table_info.get('table_name', '') - table_note = table_info.get('table_note', '') - note = '\n' - note += '\n'+'\n'+'\n' - note += '\n'+f'\n'+'\n' - note += '\n'+'\n'+'\n' - note += '\n'+f'\n'+'\n' - note += '\n'+' \n\n\n'+'\n' - for column_info in column_info_list: - column_name = column_info.get('column_name', '') - column_type = column_info.get('column_type', '') - column_note = column_info.get('column_note', '') - note += '\n'+f' \n\n\n'+'\n' - note += '
表名
{table_name}
表的注释
{table_note}
字段字段类型字段注释
{column_name}{column_type}{column_note}
' - return note - - @staticmethod - def extract_list_statements(list_string): - pattern = r'\[.*?\]' - matches = re.findall(pattern, list_string) - if len(matches) == 0: - return '' - tmp = matches[0] - tmp = tmp.replace('\'', '\"') - tmp = tmp.replace(',', ',') - return tmp - - @staticmethod - async def get_most_similar_table_id_list(database_id, question, table_choose_cnt): - table_info_list = await TableInfoManager.get_table_info_by_database_id(database_id) - random.shuffle(table_info_list) - table_id_set = set() - for table_info in table_info_list: - table_id = table_info['table_id'] - table_id_set.add(str(table_id)) - try: - with open('./chat2db/templetes/prompt.yaml', 'r', encoding='utf-8') as f: - prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) - prompt = prompt_dict.get('table_choose_prompt', '') - table_entries = '\n' - table_entries += '\n'+' \n\n'+'\n' - token_upper = 2048 - for table_info in table_info_list: - table_id = table_info['table_id'] - table_note = table_info['table_note'] - if len(table_entries) + len( - '\n' + f' \n\n' + '\n') > token_upper: - break - table_entries += '\n'+f' \n\n'+'\n' - table_entries += '
主键表注释
{table_id}{table_note}
{table_id}{table_note}
' - prompt = prompt.format(table_cnt=table_choose_cnt, table_entries=table_entries, question=question) - # logging.info(f'在大模型增强模式下,选择表的prompt构造成功:{prompt}') - except Exception as e: - logging.error(f'在大模型增强模式下,选择表的prompt构造失败由于:{e}') - return [] - try: - llm = LLM(model_name=config['LLM_MODEL'], - openai_api_base=config['LLM_URL'], - openai_api_key=config['LLM_KEY'], - max_tokens=config['LLM_MAX_TOKENS'], - request_timeout=60, - temperature=0.5) - except Exception as e: - llm = None - logging.error(f'在大模型增强模式下,选择表的过程中,与大模型建立连接失败由于:{e}') - table_id_list = [] - if llm is not None: - for i in range(2): - content = await llm.chat_with_model(prompt, '请输包含选择表主键的列表') - try: - sub_table_id_list = json.loads(SqlGenerateService.extract_list_statements(content)) - except: - sub_table_id_list = [] - for j in range(len(sub_table_id_list)): - if sub_table_id_list[j] in table_id_set and uuid.UUID(sub_table_id_list[j]) not in table_id_list: - table_id_list.append(uuid.UUID(sub_table_id_list[j])) - if len(table_id_list) < table_choose_cnt: - table_choose_cnt -= len(table_id_list) - for i in range(min(table_choose_cnt, len(table_info_list))): - table_id = table_info_list[i]['table_id'] - if table_id is not None and table_id not in table_id_list: - table_id_list.append(table_id) - return table_id_list - - @staticmethod - async def find_most_similar_sql_example( - database_id, table_id_list, question, use_llm_enhancements=False, table_choose_cnt=2, sql_example_choose_cnt=10, - topk=5): - try: - database_url = await DatabaseInfoManager.get_database_url_by_id(database_id) - except Exception as e: - logging.error(f'数据库{database_id}信息获取失败由于{e}') - return [] - database_type = DiffDatabaseService.get_database_type_from_url(database_url) - del database_url - try: - question_vector = await Vectorize.vectorize_embedding(question) - except Exception as e: - logging.error(f'问题向量化失败由于:{e}') - return {} - sql_example = [] - data_frame_list = [] - if table_id_list is None: - if use_llm_enhancements: - table_id_list = await SqlGenerateService.get_most_similar_table_id_list(database_id, question, table_choose_cnt) - else: - try: - table_info_list = await TableInfoManager.get_table_info_by_database_id(database_id) - table_id_list = [] - for table_info in table_info_list: - table_id_list.append(table_info['table_id']) - max_retry = 3 - sql_example_list = [] - for _ in range(max_retry): - try: - sql_example_list = await asyncio.wait_for(SqlExampleManager.get_topk_sql_example_by_cos_dis( - question_vector=question_vector, - table_id_list=table_id_list, topk=table_choose_cnt * 2), - timeout=5 - ) - break - except Exception as e: - logging.error(f'非增强模式下,sql_example获取失败:{e}') - table_id_list = [] - for sql_example in sql_example_list: - table_id_list.append(sql_example['table_id']) - except Exception as e: - logging.error(f'非增强模式下,表id获取失败由于:{e}') - return [] - table_id_list = list(set(table_id_list)) - if len(table_id_list) < table_choose_cnt: - try: - expand_table_id_list = await asyncio.wait_for(TableInfoManager.get_topk_table_by_cos_dis( - database_id, question_vector, table_choose_cnt - len(table_id_list)), timeout=5 - ) - table_id_list += expand_table_id_list - except Exception as e: - logging.error(f'非增强模式下,表id补充失败由于:{e}') - exist_table_id = set() - note_list = [] - for i in range(min(2, len(table_id_list))): - table_id = table_id_list[i] - if table_id in exist_table_id: - continue - exist_table_id.add(table_id) - try: - table_info = await TableInfoManager.get_table_info_by_table_id(table_id) - column_info_list = await ColumnInfoManager.get_column_info_by_table_id(table_id) - except Exception as e: - logging.error(f'表{table_id}注释获取失败由于{e}') - continue - note = await SqlGenerateService.merge_table_and_column_info(table_info, column_info_list) - note_list.append(note) - max_retry = 3 - sql_example_list = [] - for _ in range(max_retry): - try: - sql_example_list = await asyncio.wait_for(SqlExampleManager.get_topk_sql_example_by_cos_dis( - question_vector, - table_id_list=[table_id], - topk=sql_example_choose_cnt), - timeout=5 - ) - break - except Exception as e: - logging.error(f'获取id为{table_id}的表的最相近的{topk}条sql案例失败由于:{e}') - question_sql_list = [] - for i in range(len(sql_example_list)): - question_sql_list.append( - {'question': sql_example_list[i]['question'], - 'sql': sql_example_list[i]['sql']}) - data_frame_list.append({'table_id': table_id, 'table_info': table_info, - 'column_info_list': column_info_list, 'sql_example_list': question_sql_list}) - return data_frame_list - - @staticmethod - async def merge_sql_example(sql_example_list): - sql_example = '' - for i in range(len(sql_example_list)): - sql_example += '问题'+str(i)+':\n'+sql_example_list[i].get('question', - '')+'\nsql'+str(i)+':\n'+sql_example_list[i].get('sql', '')+'\n' - return sql_example - - @staticmethod - async def extract_select_statements(sql_string): - pattern = r"(?i)select[^;]*;" - matches = re.findall(pattern, sql_string) - if len(matches) == 0: - return '' - sql = matches[0] - sql = sql.strip() - sql.replace(',', ',') - return sql - - @staticmethod - async def generate_sql_base_on_example( - database_id, question, table_id_list=None, sql_generate_cnt=1, use_llm_enhancements=False): - try: - database_url = await DatabaseInfoManager.get_database_url_by_id(database_id) - except Exception as e: - logging.error(f'数据库{database_id}信息获取失败由于{e}') - return {} - if database_url is None: - raise Exception('数据库配置不存在') - database_type = DiffDatabaseService.get_database_type_from_url(database_url) - data_frame_list = await SqlGenerateService.find_most_similar_sql_example(database_id, table_id_list, question, use_llm_enhancements) - try: - with open('./chat2db/templetes/prompt.yaml', 'r', encoding='utf-8') as f: - prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) - llm = LLM(model_name=config['LLM_MODEL'], - openai_api_base=config['LLM_URL'], - openai_api_key=config['LLM_KEY'], - max_tokens=config['LLM_MAX_TOKENS'], - request_timeout=60, - temperature=0.5) - results = [] - for data_frame in data_frame_list: - prompt = prompt_dict.get('sql_generate_base_on_example_prompt', '') - table_info = data_frame.get('table_info', '') - table_id = table_info['table_id'] - column_info_list = data_frame.get('column_info_list', '') - note = await SqlGenerateService.merge_table_and_column_info(table_info, column_info_list) - sql_example = await SqlGenerateService.merge_sql_example(data_frame.get('sql_example_list', [])) - try: - prompt = prompt.format( - database_url=database_url, note=note, k=len(data_frame.get('sql_example_list', [])), - sql_example=sql_example, question=question) - except Exception as e: - logging.info(f'sql生成失败{e}') - return [] - ge_cnt = 0 - ge_sql_cnt = 0 - while ge_cnt < 10*sql_generate_cnt and ge_sql_cnt < sql_generate_cnt: - sql = await llm.chat_with_model(prompt, f'请输出一条在与{database_type}下能运行的sql,以分号结尾') - sql = await SqlGenerateService.extract_select_statements(sql) - if len(sql): - ge_sql_cnt += 1 - tmp_dict = {'database_id': database_id, 'table_id': table_id, 'sql': sql} - results.append(tmp_dict) - ge_cnt += 1 - if len(results) == sql_generate_cnt: - break - except Exception as e: - logging.error(f'sql生成失败由于:{e}') - return results - - @staticmethod - async def generate_sql_base_on_data(database_url, table_name, sql_var=False): - database_type = None - database_type = DiffDatabaseService.get_database_type_from_url(database_url) - flag = await DiffDatabaseService.get_database_service(database_type).test_database_connection(database_url) - if not flag: - return None - table_name_list = await DiffDatabaseService.get_database_service(database_type).get_all_table_name_from_database_url(database_url) - if table_name not in table_name_list: - return None - table_info = await DiffDatabaseService.get_database_service(database_type).get_table_info(database_url, table_name) - column_info_list = await DiffDatabaseService.get_database_service(database_type).get_column_info(database_url, table_name) - note = await SqlGenerateService.merge_table_and_column_info(table_info, column_info_list) - - def count_char(str, char): - return sum(1 for c in str if c == char) - llm = LLM(model_name=config['LLM_MODEL'], - openai_api_base=config['LLM_URL'], - openai_api_key=config['LLM_KEY'], - max_tokens=config['LLM_MAX_TOKENS'], - request_timeout=60, - temperature=0.5) - for i in range(5): - data_frame = await DiffDatabaseService.get_database_service(database_type).get_rand_data(database_url, table_name) - try: - with open('./chat2db/templetes/prompt.yaml', 'r', encoding='utf-8') as f: - prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) - prompt = prompt_dict['question_generate_base_on_data_prompt'].format( - note=note, data_frame=data_frame) - question = await llm.chat_with_model(prompt, '请输出一个问题') - if count_char(question, '?') > 1 or count_char(question, '?') > 1: - continue - except Exception as e: - logging.error(f'问题生成失败由于{e}') - continue - try: - with open('./chat2db/templetes/prompt.yaml', 'r', encoding='utf-8') as f: - prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) - prompt = prompt_dict['sql_generate_base_on_data_prompt'].format( - database_type=database_type, - note=note, data_frame=data_frame, question=question) - sql = await llm.chat_with_model(prompt, f'请输出一条可以用于查询{database_type}的sql,要以分号结尾') - sql = await SqlGenerateService.extract_select_statements(sql) - if not sql: - continue - except Exception as e: - logging.error(f'sql生成失败由于{e}') - continue - try: - if sql_var: - await DiffDatabaseService.get_database_service(database_type).try_excute(database_url, sql) - except Exception as e: - logging.error(f'生成的sql执行失败由于{e}') - continue - return { - 'question': question, - 'sql': sql - } - return None - - @staticmethod - async def repair_sql(database_type, table_info, column_info_list, sql_failed, sql_failed_message, question): - try: - with open('./chat2db/templetes/prompt.yaml', 'r', encoding='utf-8') as f: - prompt_dict = yaml.load(f, Loader=yaml.SafeLoader) - llm = LLM(model_name=config['LLM_MODEL'], - openai_api_base=config['LLM_URL'], - openai_api_key=config['LLM_KEY'], - max_tokens=config['LLM_MAX_TOKENS'], - request_timeout=60, - temperature=0.5) - try: - note = await SqlGenerateService.merge_table_and_column_info(table_info, column_info_list) - prompt = prompt_dict.get('sql_expand_prompt', '') - prompt = prompt.format( - database_type=database_type, note=note, sql_failed=sql_failed, - sql_failed_message=sql_failed_message, - question=question) - except Exception as e: - logging.error(f'sql修复失败由于{e}') - return '' - sql = await llm.chat_with_model(prompt, f'请输出一条在与{database_type}下能运行的sql,要以分号结尾') - sql = await SqlGenerateService.extract_select_statements(sql) - logging.info(f"修复前的sql为{sql_failed}修复后的sql为{sql}") - except Exception as e: - logging.error(f'sql生成失败由于:{e}') - return '' - return sql diff --git a/chat2db/common/.env.example b/chat2db/common/.env.example deleted file mode 100644 index 99cdc07..0000000 --- a/chat2db/common/.env.example +++ /dev/null @@ -1,27 +0,0 @@ -# FastAPI -UVICORN_IP= -UVICORN_PORT= -# SSL_CERTFILE= -# SSL_KEYFILE= -# SSL_ENABLE= - -# Postgres -DATABASE_URL= - -# QWEN -LLM_KEY= -LLM_URL= -LLM_MODEL= -LLM_MAX_TOKENS= - - -# Vectorize -EMBEDDING_TYPE = -EMBEDDING_API_KEY = -EMBEDDING_ENDPOINT = -EMBEDDING_MODEL_NAME = - -# security -HALF_KEY1= -HALF_KEY2= -HALF_KEY3= \ No newline at end of file diff --git a/chat2db/common/init_sql_example.py b/chat2db/common/init_sql_example.py deleted file mode 100644 index 800f171..0000000 --- a/chat2db/common/init_sql_example.py +++ /dev/null @@ -1,107 +0,0 @@ -import yaml -from fastapi import status -import requests -import uuid -from typing import Optional -from pydantic import BaseModel, Field -from chat2db.config.config import config -ip = config['UVICORN_IP'] -port = config['UVICORN_PORT'] -base_url = f'http://{ip}:{port}' -database_url = config['DATABASE_URL'] - - -class DatabaseDelRequest(BaseModel): - database_id: Optional[str] = Field(default=None, description="数据库id") - database_url: Optional[str] = Field(default=None, description="数据库url") - - -def del_database_url(base_url, database_url): - server_url = f'{base_url}/database/del' - try: - request_data = DatabaseDelRequest(database_url=database_url).dict() - response = requests.post(server_url, json=request_data) - if response.json()['code'] != status.HTTP_200_OK: - print(response.json()['message']) - except Exception as e: - print(f"删除数据库配置失败: {e}") - exit(0) - return None - - -class DatabaseAddRequest(BaseModel): - database_url: str - - -def add_database_url(base_url, database_url): - server_url = f'{base_url}/database/add' - try: - request_data = DatabaseAddRequest(database_url=database_url).dict() - - response = requests.post(server_url, json=request_data) - response.raise_for_status() - if response.json()['code'] != status.HTTP_200_OK: - raise Exception(response.json()['message']) - except Exception as e: - print(f"增加数据库配置失败: {e}") - exit(0) - return response.json()['result']['database_id'] - - -class TableAddRequest(BaseModel): - database_id: str - table_name: str - - -def add_table(base_url, database_id, table_name): - server_url = f'{base_url}/table/add' - try: - request_data = TableAddRequest(database_id=database_id, table_name=table_name).dict() - response = requests.post(server_url, json=request_data) - response.raise_for_status() - if response.json()['code'] != status.HTTP_200_OK: - raise Exception(response.json()['message']) - except Exception as e: - print(f"增加表配置失败: {e}") - return - return response.json()['result']['table_id'] - - -class SqlExampleAddRequest(BaseModel): - table_id: str - question: str - sql: str - - -def add_sql_example(base_url, table_id, question, sql): - server_url = f'{base_url}/sql/example/add' - try: - request_data = SqlExampleAddRequest(table_id=table_id, question=question, sql=sql).dict() - response = requests.post(server_url, json=request_data) - if response.json()['code'] != status.HTTP_200_OK: - raise Exception(response.json()['message']) - except Exception as e: - print(f"增加sql案例失败: {e}") - return - return response.json()['result']['sql_example_id'] - - -database_id = del_database_url(base_url, database_url) -database_id = add_database_url(base_url, database_url) -with open('./chat2db/common/table_name.yaml') as f: - table_name_list = yaml.load(f, Loader=yaml.SafeLoader) -table_name_id = {} -for table_name in table_name_list: - table_id = add_table(base_url, database_id, table_name) - if table_id: - table_name_id[table_name] = table_id -with open('./chat2db/common/table_name_sql_exmple.yaml') as f: - table_name_sql_example_list = yaml.load(f, Loader=yaml.SafeLoader) -for table_name_sql_example in table_name_sql_example_list: - table_name = table_name_sql_example['table_name'] - if table_name not in table_name_id: - continue - table_id = table_name_id[table_name] - sql_example_list = table_name_sql_example['sql_example_list'] - for sql_example in sql_example_list: - add_sql_example(base_url, table_id, sql_example['question'], sql_example['sql']) diff --git a/chat2db/common/table_name.yaml b/chat2db/common/table_name.yaml deleted file mode 100644 index 553cf1b..0000000 --- a/chat2db/common/table_name.yaml +++ /dev/null @@ -1,10 +0,0 @@ -- oe_community_openeuler_version -- oe_community_organization_structure -- oe_compatibility_card -- oe_compatibility_commercial_software -- oe_compatibility_cve_database -- oe_compatibility_oepkgs -- oe_compatibility_osv -- oe_compatibility_overall_unit -- oe_compatibility_security_notice -- oe_compatibility_solution diff --git a/chat2db/common/table_name_sql_exmple.yaml b/chat2db/common/table_name_sql_exmple.yaml deleted file mode 100644 index 8e87a11..0000000 --- a/chat2db/common/table_name_sql_exmple.yaml +++ /dev/null @@ -1,490 +0,0 @@ -- keyword_list: - - test_organization - - product_name - - company_name - sql_example_list: - - question: openEuler支持的哪些商业软件在江苏鲲鹏&欧拉生态创新中心测试通过 - sql: SELECT product_name, product_version, openeuler_version FROM public.oe_compatibility_commercial_software - WHERE test_organization ILIKE '%江苏鲲鹏&欧拉生态创新中心%'; - - question: 哪个版本的openEuler支持的商业软件最多 - sql: SELECT openeuler_version, COUNT(*) AS software_count FROM public.oe_compatibility_commercial_software GROUP - BY openeuler_version ORDER BY software_count DESC LIMIT 1; - - question: openEuler支持测试商业软件的机构有哪些? - sql: SELECT DISTINCT test_organization FROM public.oe_compatibility_commercial_software; - - question: openEuler支持的商业软件有哪些类别 - sql: SELECT DISTINCT "type" FROM public.oe_compatibility_commercial_software; - - question: openEuler有哪些虚拟化类别的商业软件 - sql: SELECT product_name FROM public.oe_compatibility_commercial_software WHERE - "type" ILIKE '%虚拟化%'; - - question: openEuler支持哪些ISV商业软件呢,请列出10个 - sql: SELECT product_name FROM public.oe_compatibility_commercial_software; - - question: openEuler支持的适配Kunpeng 920的互联网商业软件有哪些? - sql: SELECT product_name, openeuler_version,platform_type_and_server_model FROM - public.oe_compatibility_commercial_software WHERE platform_type_and_server_model - ILIKE '%Kunpeng 920%' AND "type" ILIKE '%互联网%' limit 30; - - question: openEuler-22.03版本支持哪些商业软件? - sql: SELECT product_name, openeuler_version FROM oe_compatibility_commercial_software - WHERE openeuler_version ILIKE '%22.03%'; - - question: openEuler支持的数字政府类型的商业软件有哪些 - sql: SELECT product_name, product_version FROM oe_compatibility_commercial_software - WHERE type ILIKE '%数字政府%'; - - question: 有哪些商业软件支持超过一种服务器平台 - sql: SELECT product_name FROM public.oe_compatibility_commercial_software WHERE - platform_type_and_server_model ILIKE '%Intel%' AND platform_type_and_server_model - ILIKE '%Kunpeng%'; - - question: 每个openEuler版本有多少种类型的商业软件支持 - sql: SELECT openeuler_version, COUNT(DISTINCT type) AS type_count FROM public.oe_compatibility_commercial_software GROUP - BY openeuler_version; - - question: openEuler支持的哪些商业ISV在江苏鲲鹏&欧拉生态创新中心测试通过 - sql: SELECT product_name, product_version, openeuler_version FROM public.oe_compatibility_commercial_software - WHERE test_organization ILIKE '%江苏鲲鹏&欧拉生态创新中心%'; - - question: 哪个版本的openEuler支持的商业ISV最多 - sql: SELECT openeuler_version, COUNT(*) AS software_count FROM public.oe_compatibility_commercial_software GROUP - BY openeuler_version ORDER BY software_count DESC LIMIT 1; - - question: openEuler支持测试商业ISV的机构有哪些? - sql: SELECT DISTINCT test_organization FROM public.oe_compatibility_commercial_software; - - question: openEuler支持的商业ISV有哪些类别 - sql: SELECT DISTINCT "type" FROM public.oe_compatibility_commercial_software; - - question: openEuler有哪些虚拟化类别的商业ISV - sql: SELECT product_name FROM public.oe_compatibility_commercial_software WHERE - "type" ILIKE '%虚拟化%'; - - question: openEuler支持哪些ISV商业ISV呢,请列出10个 - sql: SELECT product_name FROM public.oe_compatibility_commercial_software; - - question: openEuler支持的适配Kunpeng 920的互联网商业ISV有哪些? - sql: SELECT product_name, openeuler_version,platform_type_and_server_model FROM - public.oe_compatibility_commercial_software WHERE platform_type_and_server_model - ILIKE '%Kunpeng 920%' AND "type" ILIKE '%互联网%' limit 30; - - question: openEuler-22.03版本支持哪些商业ISV? - sql: SELECT product_name, openeuler_version FROM oe_compatibility_commercial_software - WHERE openeuler_version ILIKE '%22.03%'; - - question: openEuler支持的数字政府类型的商业ISV有哪些 - sql: SELECT product_name, product_version FROM oe_compatibility_commercial_software - WHERE type ILIKE '%数字政府%'; - - question: 有哪些商业ISV支持超过一种服务器平台 - sql: SELECT product_name FROM public.oe_compatibility_commercial_software WHERE - platform_type_and_server_model ILIKE '%Intel%' AND platform_type_and_server_model - ILIKE '%Kunpeng%'; - - question: 每个openEuler版本有多少种类型的商业ISV支持 - sql: SELECT openeuler_version, COUNT(DISTINCT type) AS type_count FROM public.oe_compatibility_commercial_software GROUP - BY openeuler_version; - - question: 卓智校园网接入门户系统基于openeuelr的什么版本? - sql: select * from oe_compatibility_commercial_software where product_name ilike - '%卓智校园网接入门户系统%'; - table_name: oe_compatibility_commercial_software -- keyword_list: - - softwareName - sql_example_list: - - question: openEuler-20.03-LTS-SP1支持哪些开源软件? - sql: SELECT DISTINCT openeuler_version,"softwareName" FROM public.oe_compatibility_open_source_software WHERE - openeuler_version ILIKE '%20.03-LTS-SP1%'; - - question: openEuler的aarch64下支持开源软件 - sql: SELECT "softwareName" FROM public.oe_compatibility_open_source_software WHERE - "arch" ILIKE '%aarch64%'; - - question: openEuler支持开源软件使用了GPLv2+许可证 - sql: SELECT "softwareName" FROM public.oe_compatibility_open_source_software WHERE - "license" ILIKE '%GPLv2+%'; - - question: tcplay支持的架构是什么 - sql: SELECT "arch" FROM public.oe_compatibility_open_source_software WHERE "softwareName" - ILIKE '%tcplay%'; - - question: openEuler支持哪些开源软件,请列出10个 - sql: SELECT "softwareName" FROM public.oe_compatibility_open_source_software LIMIT - 10; - - question: openEuler支持开源软件支持哪些结构 - sql: SELECT "arch" FROM public.oe_compatibility_open_source_software group by - "arch"; - - question: openEuler支持多少个开源软件? - sql: select tmp_table.openeuler_version,count(*) as open_source_software_cnt from - (select DISTINCT openeuler_version,"softwareName" from oe_compatibility_open_source_software) - as tmp_table group by tmp_table.openeuler_version; - - question: openEuler-20.03-LTS-SP1支持哪些开源ISV? - sql: SELECT DISTINCT openeuler_version,"softwareName" FROM public.oe_compatibility_open_source_software WHERE - openeuler_version ILIKE '%20.03-LTS-SP1%'; - - question: openEuler的aarch64下支持开源ISV - sql: SELECT "softwareName" FROM public.oe_compatibility_open_source_software WHERE - "arch" ILIKE '%aarch64%'; - - question: openEuler支持开源ISV使用了GPLv2+许可证 - sql: SELECT "softwareName" FROM public.oe_compatibility_open_source_software WHERE - "license" ILIKE '%GPLv2+%'; - - question: tcplay支持的架构是什么 - sql: SELECT "arch" FROM public.oe_compatibility_open_source_software WHERE "softwareName" - ILIKE '%tcplay%'; - - question: openEuler支持哪些开源ISV,请列出10个 - sql: SELECT "softwareName" FROM public.oe_compatibility_open_source_software LIMIT - 10; - - question: openEuler支持开源ISV支持哪些结构 - sql: SELECT "arch" FROM public.oe_compatibility_open_source_software group by - "arch"; - - question: openEuler-20.03-LTS-SP1支持多少个开源ISV? - sql: select tmp_table.openeuler_version,count(*) as open_source_software_cnt from - (select DISTINCT openeuler_version,"softwareName" from oe_compatibility_open_source_software - where openeuler_version ilike 'openEuler-20.03-LTS-SP1') as tmp_table group - by tmp_table.openeuler_version; - - question: openEuler支持多少个开源ISV? - sql: select tmp_table.openeuler_version,count(*) as open_source_software_cnt from - (select DISTINCT openeuler_version,"softwareName" from oe_compatibility_open_source_software) - as tmp_table group by tmp_table.openeuler_version; - table_name: oe_compatibility_open_source_software -- keyword_list: [] - sql_example_list: - - question: 在openEuler技术委员会担任委员的人有哪些 - sql: SELECT name FROM oe_community_organization_structure WHERE committee_name - ILIKE '%技术委员会%' AND role = '委员'; - - question: openEuler的委员会中哪些人是教授 - sql: SELECT name FROM oe_community_organization_structure WHERE personal_message - ILIKE '%教授%'; - - question: openEuler各委员会中担任主席有多少个? - sql: SELECT committee_name, COUNT(*) FROM oe_community_organization_structure - WHERE role = '主席' GROUP BY committee_name; - - question: openEuler 用户委员会中有多少位成员 - sql: SELECT count(*) FROM oe_community_organization_structure WHERE committee_name - ILIKE '%用户委员会%'; - - question: openEuler 技术委员会有多少位成员 - sql: SELECT count(*) FROM oe_community_organization_structure WHERE committee_name - ILIKE '%技术委员会%'; - - question: openEuler委员会的委员常务委员会委员有哪些人 - sql: SELECT name FROM oe_community_organization_structure WHERE committee_name - ILIKE '%委员会%' AND role ILIKE '%常务委员会委员%'; - - question: openEuler委员会有哪些人属于华为技术有限公司? - sql: SELECT DISTINCT name FROM oe_community_organization_structure WHERE personal_message - ILIKE '%华为技术有限公司%'; - - question: openEuler每个委员会有多少人? - sql: SELECT committee_name, COUNT(*) FROM oe_community_organization_structure - GROUP BY committee_name; - - question: openEuler的执行总监是谁 - sql: SELECT name FROM oe_community_organization_structure WHERE role = '执行总监'; - - question: openEuler委员会有哪些组织? - sql: SELECT DISTINCT committee_name from oe_community_organization_structure; - - question: openEuler技术委员会的主席是谁? - sql: SELECT committee_name,name FROM oe_community_organization_structure WHERE - role = '主席' and committee_name ilike '%技术委员会%'; - - question: openEuler品牌委员会的主席是谁? - sql: SELECT committee_name,name FROM oe_community_organization_structure WHERE - role = '主席' and committee_name ilike '%品牌委员会%'; - - question: openEuler委员会的主席是谁? - sql: SELECT committee_name,name FROM oe_community_organization_structure WHERE - role = '主席' and committee_name ilike '%openEuler 委员会%'; - - question: openEuler委员会的执行总监是谁? - sql: SELECT committee_name,name FROM oe_community_organization_structure WHERE - role = '执行总监' and committee_name ilike '%openEuler 委员会%'; - - question: openEuler委员会的执行秘书是谁? - sql: SELECT committee_name,name FROM oe_community_organization_structure WHERE - role = '执行秘书' and committee_name ilike '%openEuler 委员会%'; - table_name: oe_community_organization_structure -- keyword_list: - - cve_id - sql_example_list: - - question: 安全公告openEuler-SA-2024-2059的详细信息在哪里? - sql: select DISTINCT security_notice_no,details from oe_compatibility_security_notice - where security_notice_no='openEuler-SA-2024-2059'; - table_name: oe_compatibility_security_notice -- keyword_list: - - hardware_model - sql_example_list: - - question: openEuler-22.03 LTS支持哪些整机? - sql: SELECT main_board_model, cpu, ram FROM oe_compatibility_overall_unit WHERE - openeuler_version ILIKE '%openEuler-22.03-LTS%'; - - question: 查询所有支持`openEuler-22.09`,并且提供详细产品介绍链接的整机型号和它们的内存配置? - sql: SELECT hardware_model, ram FROM oe_compatibility_overall_unit WHERE openeuler_version - ILIKE '%openEuler-22.09%' AND product_information IS NOT NULL; - - question: 显示所有由新华三生产,支持`openEuler-20.03 LTS SP2`版本的整机,列出它们的型号和架构类型 - sql: SELECT hardware_model, architecture FROM oe_compatibility_overall_unit WHERE - hardware_factory = '新华三' AND openeuler_version ILIKE '%openEuler-20.03 LTS SP2%'; - - question: openEuler支持多少种整机? - sql: SELECT count(DISTINCT main_board_model) FROM oe_compatibility_overall_unit; - - question: openEuler每个版本支持多少种整机? - sql: select openeuler_version,count(*) from (SELECT DISTINCT openeuler_version,main_board_model - FROM oe_compatibility_overall_unit) as tmp_table group by openeuler_version; - - question: openEuler每个版本多少种架构的整机? - sql: select openeuler_version,architecture,count(*) from (SELECT DISTINCT openeuler_version,architecture,main_board_model - FROM oe_compatibility_overall_unit) as tmp_table group by openeuler_version,architecture; - table_name: oe_compatibility_overall_unit -- keyword_list: - - osv_name - - os_version - sql_example_list: - - question: 深圳开鸿数字产业发展有限公司基于openEuler的什么版本发行了什么商用版本? - sql: select os_version,openeuler_version,os_download_link from oe_compatibility_osv - where osv_name='深圳开鸿数字产业发展有限公司'; - - question: 统计各个openEuler版本下的商用操作系统数量 - sql: SELECT openeuler_version, COUNT(*) AS os_count FROM public.oe_compatibility_osv GROUP - BY openeuler_version; - - question: 哪个OS厂商基于openEuler发布的商用操作系统最多 - sql: SELECT osv_name, COUNT(*) AS os_count FROM public.oe_compatibility_osv GROUP - BY osv_name ORDER BY os_count DESC LIMIT 1; - - question: 不同OS厂商基于openEuler发布不同架构的商用操作系统数量是多少? - sql: SELECT arch, osv_name, COUNT(*) AS os_count FROM public.oe_compatibility_osv GROUP - BY arch, osv_name ORDER BY arch, os_count DESC; - - question: 深圳开鸿数字产业发展有限公司的商用操作系统是基于什么openEuler版本发布的 - sql: SELECT os_version, openeuler_version FROM public.oe_compatibility_osv WHERE - osv_name ILIKE '%深圳开鸿数字产业发展有限公司%'; - - question: openEuler有哪些OSV伙伴 - sql: SELECT DISTINCT osv_name FROM public.oe_compatibility_osv; - - question: 有哪些OSV友商的操作系统是x86_64架构的 - sql: SELECT osv_name, os_version FROM public.oe_compatibility_osv WHERE arch ILIKE - '%x86_64%'; - - question: 哪些OSV友商操作系统是嵌入式类型的 - sql: SELECT osv_name, os_version,openeuler_version FROM public.oe_compatibility_osv - WHERE type ILIKE '%嵌入式%'; - - question: 成都鼎桥的商用操作系统版本是基于openEuler 22.03的版本吗 - sql: SELECT osv_name, os_version,"openeuler_version" FROM public.oe_compatibility_osv WHERE - osv_name ILIKE '%成都鼎桥通信技术有限公司%' AND openeuler_version ILIKE '%22.03%'; - - question: 最近发布的基于openEuler 23.09的商用系统有哪些 - sql: SELECT osv_name, os_version,"openeuler_version" FROM public.oe_compatibility_osv WHERE - openeuler_version ILIKE '%23.09%' ORDER BY date DESC limit 10; - - question: 帮我查下成都智明达发布的所有嵌入式系统 - sql: SELECT osv_name, os_version,"openeuler_version" FROM public.oe_compatibility_osv WHERE - osv_name ILIKE '%成都智明达电子股份有限公司%' AND type = '嵌入式'; - - question: 基于openEuler发布的商用操作系统有哪些类型 - sql: SELECT DISTINCT type FROM public.oe_compatibility_osv; - - question: 江苏润和系统版本HopeOS-V22-x86_64-dvd.iso基于openEuler哪个版本 - sql: SELECT DISTINCT osv_name, os_version,"openeuler_version" FROM public.oe_compatibility_osv - WHERE "osv_name" ILIKE '%江苏润和%' AND os_version ILIKE '%HopeOS-V22-x86_64-dvd.iso%' - ; - - question: 浙江大华DH-IVSS-OSV-22.03-LTS-SP2-x86_64-dvd.iso系统版本基于openEuler哪个版本 - sql: SELECT DISTINCT osv_name, os_version,"openeuler_version" FROM public.oe_compatibility_osv - WHERE "osv_name" ILIKE '%浙江大华%' AND os_version ILIKE '%DH-IVSS-OSV-22.03-LTS-SP2-x86_64-dvd.iso%' - ; - table_name: oe_compatibility_osv -- keyword_list: - - board_model - - chip_model - - chip_vendor - - product - sql_example_list: - - question: openEuler 22.03支持哪些网络接口卡型号? - sql: SELECT board_model, chip_model,type FROM oe_compatibility_card WHERE type - ILIKE '%NIC%' AND openeuler_version ILIKE '%22.03%' limit 30; - - question: 请列出openEuler支持的所有Renesas公司的密码卡 - sql: SELECT * FROM oe_compatibility_card WHERE chip_vendor ILIKE '%Renesas%' AND - type ILIKE '%密码卡%' limit 30; - - question: openEuler各种架构支持的板卡数量是多少 - sql: SELECT architecture, COUNT(*) AS total_cards FROM oe_compatibility_card GROUP - BY architecture limit 30; - - question: 每个openEuler版本支持了多少种板卡 - sql: SELECT openeuler_version, COUNT(*) AS number_of_cards FROM oe_compatibility_card - GROUP BY openeuler_version limit 30; - - question: openEuler总共支持多少种不同的板卡型号 - sql: SELECT COUNT(DISTINCT board_model) AS board_model_cnt FROM oe_compatibility_card - limit 30; - - question: openEuler支持的GPU型号有哪些? - sql: SELECT chip_model, openeuler_version,type FROM public.oe_compatibility_card WHERE - type ILIKE '%GPU%' ORDER BY driver_date DESC limit 30; - - question: openEuler 20.03 LTS-SP4版本支持哪些类型的设备 - sql: SELECT DISTINCT openeuler_version,type FROM public.oe_compatibility_card WHERE - openeuler_version ILIKE '%20.03-LTS-SP4%' limit 30; - - question: openEuler支持的板卡驱动在2023年后发布 - sql: SELECT board_model, driver_date, driver_name FROM oe_compatibility_card WHERE - driver_date >= '2023-01-01' limit 30; - - question: 给些支持openEuler的aarch64架构下支持的的板卡的驱动下载链接 - sql: SELECT openeuler_version,board_model, download_link FROM oe_compatibility_card - WHERE architecture ILIKE '%aarch64%' AND download_link IS NOT NULL limit 30; - - question: openEuler-22.03-LTS-SP1支持的存储卡有哪些? - sql: SELECT openeuler_version,board_model, chip_model,type FROM oe_compatibility_card - WHERE type ILIKE '%SSD%' AND openeuler_version ILIKE '%openEuler-22.03-LTS-SP1%' - limit 30; - table_name: oe_compatibility_card -- keyword_list: - - cve_id - sql_example_list: - - question: CVE-2024-41053的详细信息在哪里可以看到? - sql: select DISTINCT cve_id,details from oe_compatibility_cve_database where cve_id='CVE-2024-41053'; - - question: CVE-2024-41053是个怎么样的漏洞? - sql: select DISTINCT cve_id,summary from oe_compatibility_cve_database where cve_id='CVE-2024-41053'; - - question: CVE-2024-41053影响了哪些包? - sql: select DISTINCT cve_id,package_name from oe_compatibility_cve_database where - cve_id='CVE-2024-41053'; - - question: CVE-2024-41053的cvss评分是多少? - sql: select DISTINCT cve_id,cvsss_core_nvd from oe_compatibility_cve_database - where cve_id='CVE-2024-41053'; - - question: CVE-2024-41053现在修复了么? - sql: select DISTINCT cve_id, status from oe_compatibility_cve_database where cve_id='CVE-2024-41053'; - - question: CVE-2024-41053影响了openEuler哪些版本? - sql: select DISTINCT cve_id, affected_product from oe_compatibility_cve_database - where cve_id='CVE-2024-41053'; - - question: CVE-2024-41053发布时间是? - sql: select DISTINCT cve_id, announcement_time from oe_compatibility_cve_database - where cve_id='CVE-2024-41053'; - - question: openEuler-20.03-LTS-SP4在2024年8月发布哪些漏洞? - sql: select DISTINCT affected_product,cve_id,announcement_time from oe_compatibility_cve_database - where cve_id='CVE-2024-41053' and affected_product='openEuler-20.03-LTS-SP4' - and EXTRACT(MONTH FROM announcement_time)=8; - - question: openEuler-20.03-LTS-SP4在2024年发布哪些漏洞? - sql: select DISTINCT affected_product,cve_id,announcement_time from oe_compatibility_cve_database - where cve_id='CVE-2024-41053' and affected_product='openEuler-20.03-LTS-SP4' - and EXTRACT(YEAR FROM announcement_time)=2024; - - question: CVE-2024-41053的威胁程度是怎样的? - sql: select DISTINCT affected_product,cve_id,cvsss_core_nvd,attack_complexity_nvd,attack_complexity_oe,attack_vector_nvd,attack_vector_oe - from oe_compatibility_cve_database where cve_id='CVE-2024-41053'; - table_name: oe_compatibility_cve_database -- keyword_list: - - name - sql_example_list: - - question: openEuler-20.03-LTS的非官方软件包有多少个? - sql: SELECT COUNT(*) FROM oe_compatibility_oepkgs WHERE repotype = 'openeuler_compatible' - AND openeuler_version ILIKE '%openEuler-20.03-LTS%'; - - question: openEuler支持的nginx版本有哪些? - sql: SELECT DISTINCT name,version, srcrpmpackurl FROM oe_compatibility_oepkgs - WHERE name ILIKE 'nginx'; - - question: openEuler的支持哪些架构的glibc? - sql: SELECT DISTINCT name,arch FROM oe_compatibility_oepkgs WHERE name ILIKE 'glibc'; - - question: openEuler-22.03-LTS带GPLv2许可的软件包有哪些 - sql: SELECT name,rpmlicense FROM oe_compatibility_oepkgs WHERE openeuler_version - ILIKE '%openEuler-22.03-LTS%' AND rpmlicense = 'GPLv2'; - - question: openEuler支持的python3这个软件包是用来干什么的? - sql: SELECT DISTINCT name,summary FROM oe_compatibility_oepkgs WHERE name ILIKE - 'python3'; - - question: 哪些版本的openEuler的zlib中有官方源的? - sql: SELECT DISTINCT openeuler_version,name,version FROM oe_compatibility_oepkgs - WHERE name ILIKE '%zlib%' AND repotype = 'openeuler_official'; - - question: 请以表格的形式提供openEuler-20.09的gcc软件包的下载链接 - sql: SELECT DISTINCT openeuler_version,name, rpmpackurl FROM oe_compatibility_oepkgs - WHERE openeuler_version ILIKE '%openEuler-20.09%' AND name ilike 'gcc'; - - question: 请以表格的形式提供openEuler-20.09的glibc软件包的下载链接 - sql: SELECT DISTINCT openeuler_version,name, rpmpackurl FROM oe_compatibility_oepkgs - WHERE openeuler_version ILIKE '%openEuler-20.09%' AND name ilike 'glibc'; - - question: 请以表格的形式提供openEuler-20.09的redis软件包的下载链接 - sql: SELECT DISTINCT openeuler_version,name, rpmpackurl FROM oe_compatibility_oepkgs - WHERE openeuler_version ILIKE '%openEuler-20.09%' AND name ilike 'redis'; - - question: openEuler-20.09的支持多少个软件包? - sql: select tmp_table.openeuler_version,count(*) as oepkgs_cnt from (select DISTINCT - openeuler_version,name from oe_compatibility_oepkgs WHERE openeuler_version - ILIKE '%openEuler-20.09') as tmp_table group by tmp_table.openeuler_version; - - question: openEuler支持多少个软件包? - sql: select tmp_table.openeuler_version,count(*) as oepkgs_cnt from (select DISTINCT - openeuler_version,name from oe_compatibility_oepkgs) as tmp_table group by tmp_table.openeuler_version; - - question: 请以表格的形式提供openEuler-20.09的gcc的版本 - sql: SELECT DISTINCT openeuler_version,name, version FROM oe_compatibility_oepkgs - WHERE openeuler_version ILIKE '%openEuler-20.09%' AND name ilike 'gcc'; - - question: 请以表格的形式提供openEuler-20.09的glibc的版本 - sql: SELECT DISTINCT openeuler_version,name, version FROM oe_compatibility_oepkgs - WHERE openeuler_version ILIKE '%openEuler-20.09%' AND name ilike 'glibc'; - - question: 请以表格的形式提供openEuler-20.09的redis的版本 - sql: SELECT DISTINCT openeuler_version,name, version FROM oe_compatibility_oepkgs - WHERE openeuler_version ILIKE '%openEuler-20.09%' AND name ilike 'redis'; - - question: openEuler-20.09支持哪些gcc的版本 - sql: SELECT DISTINCT openeuler_version,name, version FROM oe_compatibility_oepkgs - WHERE openeuler_version ILIKE '%openEuler-20.09%' AND name ilike 'gcc'; - - question: openEuler-20.09支持哪些glibc的版本 - sql: SELECT DISTINCT openeuler_version,name, version FROM oe_compatibility_oepkgs - WHERE openeuler_version ILIKE '%openEuler-20.09%' AND name ilike 'glibc'; - - question: openEuler-20.09支持哪些redis的版本 - sql: SELECT DISTINCT openeuler_version,name, version FROM oe_compatibility_oepkgs - WHERE openeuler_version ILIKE '%openEuler-20.09%' AND name ilike 'redis'; - - question: '' - sql: openEuler-20.09支持的gcc版本有哪些 - - question: SELECT DISTINCT openeuler_version,name, version FROM oe_compatibility_oepkgs - WHERE openeuler_version ILIKE '%openEuler-20.09%' AND name ilike 'gcc'; - sql: openEuler-20.09支持的glibc版本有哪些 - - question: SELECT DISTINCT openeuler_version,name, version FROM oe_compatibility_oepkgs - WHERE openeuler_version ILIKE '%openEuler-20.09%' AND name ilike 'glibc'; - sql: openEuler-20.09支持的redis版本有哪些 - - question: SELECT DISTINCT openeuler_version,name, version FROM oe_compatibility_oepkgs - WHERE openeuler_version ILIKE '%openEuler-20.09%' AND name ilike 'redis'; - sql: '' - - question: openEuler-20.09支持gcc 9.3.1么? - sql: SELECT DISTINCT openeuler_version,name, version FROM oe_compatibility_oepkgs - WHERE openeuler_version ILIKE '%openEuler-20.09%' AND name ilike 'gcc' AND version - ilike '9.3.1'; - table_name: oe_compatibility_oepkgs -- keyword_list: [] - sql_example_list: - - question: openEuler社区创新版本有哪些 - sql: SELECT DISTINCT openeuler_version,version_type FROM oe_community_openeuler_version - where version_type ILIKE '%社区创新版本%'; - - question: openEuler有哪些版本 - sql: SELECT openeuler_version FROM public.oe_community_openeuler_version; - - question: 查询openeuler各版本对应的内核版本 - sql: SELECT DISTINCT openeuler_version, kernel_version FROM public.oe_community_openeuler_version; - - question: openEuler有多少个长期支持版本(LTS) - sql: SELECT COUNT(*) as publish_version_count FROM public.oe_community_openeuler_version - WHERE version_type ILIKE '%长期支持版本%'; - - question: 查询openEuler-20.03的所有SP版本 - sql: SELECT openeuler_version FROM public.oe_community_openeuler_version WHERE - openeuler_version ILIKE '%openEuler-20.03-LTS-SP%'; - - question: openEuler最新的社区创新版本内核是啥 - sql: SELECT kernel_version FROM public.oe_community_openeuler_version WHERE version_type - ILIKE '%社区创新版本%' ORDER BY publish_time DESC LIMIT 1; - - question: 最早的openEuler版本是什么时候发布的 - sql: SELECT openeuler_version,publish_time FROM public.oe_community_openeuler_version - ORDER BY publish_time ASC LIMIT 1; - - question: 最新的openEuler版本是哪个 - sql: SELECT openeuler_version,publish_time FROM public.oe_community_openeuler_version - ORDER BY publish_time LIMIT 1; - - question: openEuler有哪些版本使用了Linux 5.10.0内核 - sql: SELECT openeuler_version,kernel_version FROM public.oe_community_openeuler_version - WHERE kernel_version ILIKE '5.10.0%'; - - question: 哪个openEuler版本是最近更新的长期支持版本 - sql: SELECT openeuler_version,publish_time FROM public.oe_community_openeuler_version - WHERE version_type ILIKE '%长期支持版本%' ORDER BY publish_time DESC LIMIT 1; - - question: openEuler每个年份发布了多少个版本 - sql: SELECT EXTRACT(YEAR FROM publish_time) AS year, COUNT(*) AS publish_version_count - FROM oe_community_openeuler_version group by EXTRACT(YEAR FROM publish_time); - - question: openEuler-20.03-LTS版本的linux内核是多少? - sql: SELECT openeuler_version,kernel_version FROM public.oe_community_openeuler_version - WHERE openeuler_version = 'openEuler-20.03-LTS'; - - question: openEuler-20.03-LTS版本的linux内核是多少? - sql: SELECT openeuler_version,kernel_version FROM public.oe_community_openeuler_version - WHERE openeuler_version = 'openEuler-24.09'; - table_name: oe_community_openeuler_version -- keyword_list: - - product - sql_example_list: - - question: 哪些openEuler版本支持使用至强6338N的解决方案 - sql: SELECT DISTINCT openeuler_version FROM oe_compatibility_solution WHERE cpu - ILIKE '%6338N%'; - - question: 使用intel XXV710作为网卡的解决方案对应的是哪些服务器型号 - sql: SELECT DISTINCT server_model FROM oe_compatibility_solution WHERE network_card - ILIKE '%intel XXV710%'; - - question: 哪些解决方案的硬盘驱动为SATA-SSD Skhynix - sql: SELECT DISTINCT product FROM oe_compatibility_solution WHERE hard_disk_drive - ILIKE 'SATA-SSD Skhynix'; - - question: 查询所有使用6230R系列CPU且支持磁盘阵列支持PERC H740P Adapter的解决方案的产品名 - sql: SELECT DISTINCT product FROM oe_compatibility_solution WHERE cpu ILIKE '%6230R%' - AND raid ILIKE '%PERC H740P Adapter%'; - - question: R4900-G3有哪些驱动版本 - sql: SELECT DISTINCT driver FROM oe_compatibility_solution WHERE product ILIKE - '%R4900-G3%'; - - question: DL380 Gen10支持哪些架构 - sql: SELECT DISTINCT architecture FROM oe_compatibility_solution WHERE server_model - ILIKE '%DL380 Gen10%'; - - question: 列出所有使用Intel(R) Xeon(R)系列cpu且磁盘冗余阵列为LSI SAS3408的解决方案的服务器厂家 - sql: SELECT DISTINCT server_vendor FROM oe_compatibility_solution WHERE cpu ILIKE - '%Intel(R) Xeon(R)%' AND raid ILIKE '%LSI SAS3408%'; - - question: 哪些解决方案提供了针对SEAGATE ST4000NM0025硬盘驱动的支持 - sql: SELECT * FROM oe_compatibility_solution WHERE hard_disk_drive ILIKE '%SEAGATE - ST4000NM0025%'; - - question: 查询所有使用4316系列CPU的解决方案 - sql: SELECT * FROM oe_compatibility_solution WHERE cpu ILIKE '%4316%'; - - question: 支持openEuler-22.03-LTS-SP2版本的解决方案中,哪款服务器型号出现次数最多 - sql: SELECT server_model, COUNT(*) as count FROM oe_compatibility_solution WHERE - openeuler_version ILIKE '%openEuler-22.03-LTS-SP2%' GROUP BY server_model ORDER - BY count DESC LIMIT 1; - - question: HPE提供的解决方案的介绍链接是什么 - sql: SELECT DISTINCT introduce_link FROM oe_compatibility_solution WHERE server_vendor - ILIKE '%HPE%'; - - question: 列出所有使用intel XXV710网络卡接口的解决方案的CPU型号 - sql: SELECT DISTINCT cpu FROM oe_compatibility_solution WHERE network_card ILIKE - '%intel XXV710%'; - - question: 服务器型号为2288H V5的解决方案支持哪些不同的openEuler版本 - sql: SELECT DISTINCT openeuler_version FROM oe_compatibility_solution WHERE server_model - ILIKE '%NF5180M5%'; - - question: 使用6230R系列CPU的解决方案内存最小是多少GB - sql: SELECT MIN(ram) FROM oe_compatibility_solution WHERE cpu ILIKE '%6230R%'; - - question: 哪些解决方案的磁盘驱动为MegaRAID 9560-8i - sql: SELECT * FROM oe_compatibility_solution WHERE hard_disk_drive LIKE '%MegaRAID - 9560-8i%'; - - question: 列出所有使用6330N系列CPU且服务器厂家为Dell的解决方案的产品名 - sql: SELECT DISTINCT product FROM oe_compatibility_solution WHERE cpu ILIKE '%6330N%' - AND server_vendor ILIKE '%Dell%'; - - question: R4900-G3的驱动版本是多少 - sql: SELECT driver FROM oe_compatibility_solution WHERE product ILIKE '%R4900-G3%'; - - question: 哪些解决方案的服务器型号为2288H V7 - sql: SELECT * FROM oe_compatibility_solution WHERE server_model ILIKE '%2288H - V7%'; - - question: 使用Intel i350网卡且硬盘驱动为ST4000NM0025的解决方案的服务器厂家有哪些 - sql: SELECT DISTINCT server_vendor FROM oe_compatibility_solution WHERE network_card - ILIKE '%Intel i350%' AND hard_disk_drive ILIKE '%ST4000NM0025%'; - - question: 有多少种不同的驱动版本被用于支持openEuler-22.03-LTS-SP2版本的解决方案 - sql: SELECT COUNT(DISTINCT driver) FROM oe_compatibility_solution WHERE openeuler_version - ILIKE '%openEuler-22.03-LTS-SP2%'; - table_name: oe_compatibility_solution diff --git a/chat2db/config/config.py b/chat2db/config/config.py deleted file mode 100644 index a8f7bcb..0000000 --- a/chat2db/config/config.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -import os - -from dotenv import dotenv_values -from pydantic import BaseModel, Field - - -class ConfigModel(BaseModel): - # FastAPI - UVICORN_IP: str = Field(None, description="FastAPI 服务的IP地址") - UVICORN_PORT: int = Field(None, description="FastAPI 服务的端口号") - SSL_CERTFILE: str = Field(None, description="SSL证书文件的路径") - SSL_KEYFILE: str = Field(None, description="SSL密钥文件的路径") - SSL_ENABLE: str = Field(None, description="是否启用SSL连接") - - # Postgres - DATABASE_URL: str = Field(None, description="数据库url") - - # QWEN - LLM_KEY: str = Field(None, description="语言模型访问密钥") - LLM_URL: str = Field(None, description="语言模型服务的基础URL") - LLM_MAX_TOKENS: int = Field(None, description="单次请求中允许的最大Token数") - LLM_MODEL: str = Field(None, description="使用的语言模型名称或版本") - - # Vectorize - EMBEDDING_TYPE: str = Field("openai", description="embedding 服务的类型") - EMBEDDING_API_KEY: str = Field(None, description="embedding服务api key") - EMBEDDING_ENDPOINT: str = Field(None, description="embedding服务url地址") - EMBEDDING_MODEL_NAME: str = Field(None, description="embedding模型名称") - - # security - HALF_KEY1: str = Field(None, description='加密的密钥组件1') - HALF_KEY2: str = Field(None, description='加密的密钥组件2') - HALF_KEY3: str = Field(None, description='加密的密钥组件3') - - -class Config: - config: ConfigModel - - def __init__(self): - if os.getenv("CONFIG"): - config_file = os.getenv("CONFIG") - else: - config_file = "./chat2db/common/.env" - self.config = ConfigModel(**(dotenv_values(config_file))) - if os.getenv("PROD"): - os.remove(config_file) - - def __getitem__(self, key): - if key in self.config.__dict__: - return self.config.__dict__[key] - return None - - -config = Config() \ No newline at end of file diff --git a/chat2db/database/postgres.py b/chat2db/database/postgres.py deleted file mode 100644 index c3b8bf8..0000000 --- a/chat2db/database/postgres.py +++ /dev/null @@ -1,126 +0,0 @@ -import logging -from uuid import uuid4 -from pgvector.sqlalchemy import Vector -from sqlalchemy.orm import sessionmaker, declarative_base -from sqlalchemy import TIMESTAMP, UUID, Column, String, Boolean, ForeignKey, create_engine, func, Index -import sys -from chat2db.config.config import config - -logging.basicConfig(stream=sys.stdout, level=logging.INFO, - format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') -Base = declarative_base() - - -class DatabaseInfo(Base): - __tablename__ = 'database_info_table' - id = Column(UUID(), default=uuid4, primary_key=True) - encrypted_database_url = Column(String()) - encrypted_config = Column(String()) - hashmac = Column(String()) - created_at = Column(TIMESTAMP(timezone=True), nullable=True, server_default=func.current_timestamp()) - - -class TableInfo(Base): - __tablename__ = 'table_info_table' - id = Column(UUID(), default=uuid4, primary_key=True) - database_id = Column(UUID(), ForeignKey('database_info_table.id', ondelete='CASCADE')) - table_name = Column(String()) - table_note = Column(String()) - table_note_vector = Column(Vector(1024)) - enable = Column(Boolean, default=False) - created_at = Column(TIMESTAMP(timezone=True), nullable=True, server_default=func.current_timestamp()) - updated_at = Column( - TIMESTAMP(timezone=True), - server_default=func.current_timestamp(), - onupdate=func.current_timestamp()) - __table_args__ = ( - Index( - 'table_note_vector_index', - table_note_vector, - postgresql_using='hnsw', - postgresql_with={'m': 16, 'ef_construction': 200}, - postgresql_ops={'table_note_vector': 'vector_cosine_ops'} - ), - ) - - -class ColumnInfo(Base): - __tablename__ = 'column_info_table' - id = Column(UUID(), default=uuid4, primary_key=True) - table_id = Column(UUID(), ForeignKey('table_info_table.id', ondelete='CASCADE')) - column_name = Column(String) - column_type = Column(String) - column_note = Column(String) - enable = Column(Boolean, default=False) - - -class SqlExample(Base): - __tablename__ = 'sql_example_table' - id = Column(UUID(), default=uuid4, primary_key=True) - table_id = Column(UUID(), ForeignKey('table_info_table.id', ondelete='CASCADE')) - question = Column(String()) - sql = Column(String()) - question_vector = Column(Vector(1024)) - created_at = Column(TIMESTAMP(timezone=True), nullable=True, server_default=func.current_timestamp()) - updated_at = Column( - TIMESTAMP(timezone=True), - server_default=func.current_timestamp(), - onupdate=func.current_timestamp()) - __table_args__ = ( - Index( - 'question_vector_index', - question_vector, - postgresql_using='hnsw', - postgresql_with={'m': 16, 'ef_construction': 200}, - postgresql_ops={'question_vector': 'vector_cosine_ops'} - ), - ) - - -class PostgresDB: - _engine = None - - @classmethod - def get_mysql_engine(cls): - if not cls._engine: - cls.engine = create_engine( - config['DATABASE_URL'], - hide_parameters=True, - echo=False, - pool_recycle=300, - pool_pre_ping=True) - - Base.metadata.create_all(cls.engine) - if 'opengauss' in config['DATABASE_URL']: - from sqlalchemy import event - from opengauss_sqlalchemy.register_async import register_vector - @event.listens_for(cls.engine.sync_engine, "connect") - def connect(dbapi_connection, connection_record): - dbapi_connection.run_async(register_vector) - return cls._engine - - @classmethod - def get_session(cls): - connection = None - try: - connection = sessionmaker(bind=cls.engine)() - except Exception as e: - logging.error(f"Error creating a postgres sessiondue to error: {e}") - return None - return cls._ConnectionManager(connection) - - class _ConnectionManager: - def __init__(self, connection): - self.connection = connection - - def __enter__(self): - return self.connection - - def __exit__(self, exc_type, exc_val, exc_tb): - try: - self.connection.close() - except Exception as e: - logging.error(f"Postgres connection close failed due to error: {e}") - - -PostgresDB.get_mysql_engine() diff --git "a/chat2db/docs/chat2db\345\267\245\345\205\267\350\257\246\347\273\206\350\257\264\346\230\216.md" "b/chat2db/docs/chat2db\345\267\245\345\205\267\350\257\246\347\273\206\350\257\264\346\230\216.md" deleted file mode 100644 index e4d4758..0000000 --- "a/chat2db/docs/chat2db\345\267\245\345\205\267\350\257\246\347\273\206\350\257\264\346\230\216.md" +++ /dev/null @@ -1,391 +0,0 @@ -# 1. 背景说明 -工具聚焦于利用大模型能力智能生成SQL语句,查询数据库数据,为最终的模型拟合提供能力增强。工具可增强RAG多路召回能力,增强RAG对本地用户的数据适应性,同时对于服务器、硬件型号等关键字场景,在不训练模型的情况下,RAG也具备一定检索能力 - -# 2. 工具设计框架 -## 2.1 目录结构 -``` -chat2db -|-- app # 应用主入口及相关功能模块 -|-- |-- app.py # 服务请求入口,处理用户请求并返回结果 -|-- |-- __init__.py # 初始化 -|-- | -|-- |-- base # 基础功能模块 -|-- |-- |-- ac_automation.py # AC 自动机 -|-- |-- |-- mysql.py # MySQL 数据库操作封装 -|-- |-- |-- postgres.py # PostgreSQL 数据库操作封装 -|-- |-- |-- vectorize.py # 数据向量化处理模块 -|-- | -|-- |-- router # 路由模块,负责分发请求到具体服务 -|-- |-- |-- database.py # 数据库相关路由逻辑 -|-- |-- |-- sql_example.py # SQL 示例管理路由 -|-- |-- |-- sql_generate.py # SQL 生成相关路由 -|-- |-- |-- table.py # 表信息管理路由 -|-- | -|-- |-- service # 核心服务模块 -|-- |-- |-- diff_database_service.py # 不同数据库类型的服务适配 -|-- |-- |-- keyword_service.py # 关键字检索服务 -|-- |-- |-- sql_generate_service.py # SQL 生成服务逻辑 -| -|-- common # 公共资源及配置 -|-- |-- .env # 环境变量配置文件 -|-- |-- init_sql_example.py # 初始化 SQL 示例数据脚本 -|-- |-- table_name_id.yaml # 表名与 ID 映射配置 -|-- |-- table_name_sql_example.yaml # 表名与 SQL 示例映射配置 -| -|-- config # 配置模块 -|-- |-- config.py # 工具全局配置文件 -| -|-- database # 数据库相关模块 -|-- |-- postgres.py # PostgreSQL 数据库连接及操作封装 -| -|-- llm # 大模型交互模块 -|-- |-- chat_with_model.py # 与大模型交互的核心逻辑 -| -|-- manager # 数据管理模块 -|-- |-- column_info_manager.py # 列信息管理逻辑 -|-- |-- database_info_manager.py # 数据库信息管理逻辑 -|-- |-- sql_example_manager.py # SQL 示例管理逻辑 -|-- |-- table_info_manager.py # 表信息管理逻辑 -| -|-- model # 数据模型模块 -|-- |-- request.py # 请求数据模型定义 -|-- |-- response.py # 响应数据模型定义 -| -|-- scripts # 脚本工具模块 -|-- |-- chat2db_config # 工具配置相关脚本 -|-- |-- |-- config.yaml # 工具配置文件模板 -|-- |-- output_example # 输出示例相关脚本 -|-- |-- |-- output_examples.txt # 输出示例文件 -|-- |-- run_chat2db.py # 启动工具进行交互的主脚本 -| -|-- security # 安全模块 -|-- |-- security.py # 安全相关逻辑(如权限校验、加密等) -| -|-- template # 模板及提示词相关模块 -|-- |-- change_txt_to_yaml.py # 将文本提示转换为 YAML 格式的脚本 -|-- |-- prompt.yaml # 提示词模板文件,用于生成 SQL 或问题 -``` -# 3. 主要功能介绍 -## **3.1 智能生成 SQL 查询** -- **功能描述**: - - 工具的核心功能是利用大模型(如 LLM)智能生成符合用户需求的 SQL 查询语句。 - - 用户可以通过自然语言提问,工具会根据问题内容、表结构、示例数据等信息生成对应的 SQL 查询。 -- **实现模块**: - - **路由模块**:`router/sql_generate.py` 负责接收用户请求并调用相关服务。 - - **服务模块**:`service/sql_generate_service.py` 提供 SQL 生成的核心逻辑。 - - **提示词模板**:`template/prompt.yaml` 中定义了生成 SQL 的提示词模板。 - - **数据库适配**:`base/postgres.py` 和 `base/mysql.py` 提供不同数据库的操作封装。 -- **应用场景**: - - 用户无需掌握复杂的 SQL 语法,只需通过自然语言即可完成查询。 - - 支持多种数据库类型(如 PostgreSQL 和 MySQL) - ---- - -## **3.2 关键字检索与多路召回** -- **功能描述**: - - 工具支持基于关键字的检索功能,增强 RAG 的多路召回能力。 - - 对于服务器、硬件型号等特定场景,即使未训练模型,也能通过关键字匹配快速检索相关数据。 -- **实现模块**: - - **路由模块**:`router/keyword.py` 负责处理关键字检索请求。 - - **服务模块**:`service/keyword_service.py` 提供关键字检索的核心逻辑。 - - **AC 自动机**:`base/ac_automation.py` 实现高效的多模式字符串匹配。 -- **应用场景**: - - 在不依赖大模型的情况下,快速检索与关键字相关的 SQL 示例或表信息。 - - 适用于硬件型号、服务器配置等特定场景的快速查询。 - ---- - -## **3.3 数据库表与列信息管理** -- **功能描述**: - - 工具提供对数据库表和列信息的管理功能,包括元数据存储、查询和更新。 - - 用户可以通过工具查看表结构、列注释等信息,并将其用于 SQL 查询生成。 -- **实现模块**: - - **路由模块**:`router/table.py` 负责表信息相关的请求分发。 - - **管理模块**: - - `manager/table_info_manager.py`:管理表信息。 - - `manager/column_info_manager.py`:管理列信息。 - - **数据模型**:`model/request.py` 和 `model/response.py` 定义了表和列信息的数据结构。 -- **应用场景**: - - 用户可以快速了解数据库的表结构,辅助生成更准确的 SQL 查询。 - - 支持动态更新表和列信息,适应本地数据的变化。 - ---- - -## **3.4 SQL 示例管理** -- **功能描述**: - - 工具支持对 SQL 示例的增删改查操作,并结合向量相似度检索最相关的 SQL 示例。 - - 用户可以通过问题向量找到与当前问题最相似的历史 SQL 示例,从而加速查询生成。 -- **实现模块**: - - **路由模块**:`router/sql_example.py` 负责 SQL 示例相关的请求分发。 - - **管理模块**:`manager/sql_example_manager.py` 提供 SQL 示例的管理逻辑。 - - **向量化处理**:`base/vectorize.py` 将问题文本转换为向量表示。 - - **余弦距离排序**:利用 PostgreSQL 的向量计算能力,按余弦距离排序检索最相似的 SQL 示例。 -- **应用场景**: - - 在生成新 SQL 查询时,参考历史 SQL 示例,提高查询的准确性和效率。 - - 支持对 SQL 示例的灵活管理,便于维护和扩展。 - -# 4. 工具使用 - -## 4.1 服务启动与配置 - -### 服务环境配置 - -- 在common/.env文件中配置数据库连接信息,大模型API密钥等必要参数 - -### 数据库配置 - -```bash -# 进行数据库初始化,例如 -postgres=# CREATE EXTENSION zhparser; -postgres=# CREATE EXTENSIONpostgres=# CREATE EXTENSION vector; -postgres=# CREATE TEXT SEARCH CONFIGURATION zhparser (PARSER = zhparser); -postgres=# ALTER TEXT SEARCH CONFIGURATION zhparser ADD MAPPING FOR n,v,a,i,e,l WITH simple; -postgres=# exit -``` - -### 启动服务 - -```bash -# 读取.env 环境配置,app.py入口启动服务 -python3 chat2db/app/app.py -# 配置run_chat2db.py端口 -python3 chat2db/scripts/run_chat2db.py config --ip xxx --port xxx -``` - ---- - -## 4.2 命令行工具操作指南 - -### 1. 数据库操作 - -#### 添加数据库 -```bash -python3 run_chat2db.py add_db --database_url "postgresql+psycopg2://user:password@localhost:5444/mydb" - -# 成功返回示例 ->> success ->> database_id: 27fa7fd3-949b-41f9-97bc-530f498c0b57 -``` - -#### 删除数据库 - -```bash -python3 run_chat2db.py del_db --database_id mydb_database_id -``` - -#### 查询已配置数据库 - -```bash -python3 run_chat2db.py query_db - -# 返回示例 ----------------------------------------- -查询数据库配置成功 ----------------------------------------- -database_id: 27fa7fd3-949b-41f9-97bc-530f498c0b57 -database_url: postgresql+psycopg2://postgres:123456@0.0.0.0:5444/mydb -created_at: 2025-04-08T01:49:27.544521Z ----------------------------------------- -``` - -#### 查询在数据库中的表 - -```bash -python3 run_chat2db.py list_tb_in_db --database_id mydb_database_id -# 返回示例 ----------------------------------------- -{'database_id': '27fa7fd3-949b-41f9-97bc-530f498c0b57', 'table_filter': None} -查询数据库配置成功 -my_table ----------------------------------------- -# 可过滤表名 -python3 run_chat2db.py list_tb_in_db --database_id mydb_database_id --table_filter my_table -# 返回示例 ----------------------------------------- -{'database_id': '27fa7fd3-949b-41f9-97bc-530f498c0b57', 'table_filter': 'my_table'} -查询数据库配置成功 -my_table ----------------------------------------- -``` - ---- - -### 2. 表操作 - -#### 添加数据表 -```bash -python3 run_chat2db.py add_tb --database_id mydb_database_id --table_name users - -# 成功返回示例 ->> 数据表添加成功 ->> table_id: tb_0987654321 -``` - -#### 查询已添加的表 - -```bash -python3 run_chat2db.py query_tb --database_id mydb_database_id -# 返回示例 -查询表格成功 ----------------------------------------- -table_id: 984d1c82-c6d5-4d3d-93d9-8d5bc11254ba -table_name: oe_compatibility_cve_database -table_note: openEuler社区组cve漏洞信息表,存储了cve漏洞的公告时间、id、关联的软件包名称、简介、cvss评分 -created_at: 2025-03-16T12:13:51.920663Z ----------------------------------------- -``` - -#### 删除数据表 - -```bash -python3 run_chat2db.py del_tb --table_id my_table_id -# 返回示例 -删除表格成功 -``` - -#### 查询表的列信息 - -```bash -python run_chat2db.py query_col --table_id my_table_id - -# 返回示例 --------------------------------------------------------- -column_id: 5ef50ebb-310b-48cc-bbc7-cf161c779055 -column_name: id -column_note: None -column_type: bigint -enable: False --------------------------------------------------------- -column_id: 69cf3c00-8e3c-4b99-83a5-6942278a60f3 -column_name: architecture -column_note: openEuler支持的板卡信息的支持架构 -column_type: character varying -enable: False --------------------------------------------------------- -``` - -#### 启用禁用指定列 - -```bash -python3 run_chat2db.py enable_col --column_id my_column_id --enable False -# 返回示例 -列关键字功能开启/关闭成功 -``` - ---- - -### 3. SQL示例操作 - -#### 生成SQL示例 - -```bash -python3 run_chat2db.py add_sql_exp --table_id "your_table_id" --question "查询所有用户" --sql "SELECT * FROM users" -# 返回示例 -success -sql_example_id: 4282bce7-f2fd-42b0-a63b-7afd53d9e704 -``` - -#### 批量添加SQL示例 - -1. 创建Excel文件(示例格式): - - | question | sql | - |----------|----------------------------------------------| - | 查询所有用户 | SELECT * FROM users | - | 统计北京地区用户 | SELECT COUNT(*) FROM users WHERE region='北京' | - -2. 执行导入命令: - -```bash -python3 run_chat2db.py add_sql_exp --table_id "your_table_id" --dir "path/to/examples.xlsx" -# 成功返回示例 ->> 成功添加示例:查询所有用户 ->> sql_example_id: exp_556677 ->> 成功添加示例:统计北京地区用户 ->> sql_example_id: exp_778899 -``` - ---- - -#### 删除SQL示例 - -```bash -python3 run_chat2db.py del_sql_exp --sql_example_id "your_example_id" -# 返回示例 -sql案例删除成功 -``` - -#### 查询指定表的SQL示例 - -```bash -python3 run_chat2db.py query_sql_exp --table_id "your_table_id" -# 返回示例 -查询SQL案例成功 --------------------------------------------------------- -sql_example_id: 5ab552db-b122-4653-bfdc-085c0b8557d6 -question: 查询所有用户 -sql: SELECT * FROM users --------------------------------------------------------- -``` - -#### 更新SQL示例 - -```bash -python3 run_chat2db.py update_sql_exp --sql_example_id "your_example_id" --question "新问题" --sql "新SQL语句" -# 返回示例 -sql案例更新成功 -``` - -#### 生成指定数据表SQL示例 - -```bash -python run_chat2db.py generate_sql_exp --table_id "your_table_id" --generate_cnt 5 --sql_var True --dir "output.xlsx" -# --generate_cnt 参数: 生成sql对的数量 ;--sql_var: 是否验证生成的sql对,True为验证,False不验证 -# 返回示例 -sql案例生成成功 -Data written to Excel file successfully. -``` - -### 4. 智能查询 - -#### 通过自然语言生成SQL(需配合前端或API调用) - -```python -# 示例API请求 -import requests - -url = "http://localhost:8000/sql/generate" -payload = { - "question": "显示最近7天注册的用户", - "table_id": "tb_0987654321" -} - -response = requests.post(url, json=payload) -print(response.json()) - -# 返回示例 -{ - "sql": "SELECT * FROM users WHERE registration_date >= CURRENT_DATE - INTERVAL '7 days'", - "confidence": 0.92 -} -``` - ---- - -5. **执行智能查询** -```http -POST /sql/generate -Content-Type: application/json - -{ - "question": "找出过去一个月销售额超过1万元的商品", - "table_id": "tb_yyyy" -} -``` - - - - - - - diff --git a/chat2db/llm/chat_with_model.py b/chat2db/llm/chat_with_model.py deleted file mode 100644 index 9cc1ad2..0000000 --- a/chat2db/llm/chat_with_model.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. -from langchain_openai import ChatOpenAI -from langchain.schema import SystemMessage, HumanMessage -import re - -class LLM: - def __init__(self, model_name, openai_api_base, openai_api_key, request_timeout, max_tokens, temperature): - self.client = ChatOpenAI(model_name=model_name, - openai_api_base=openai_api_base, - openai_api_key=openai_api_key, - request_timeout=request_timeout, - max_tokens=max_tokens, - temperature=temperature) - - def assemble_chat(self, system_call, user_call): - chat = [] - chat.append(SystemMessage(content=system_call)) - chat.append(HumanMessage(content=user_call)) - return chat - - async def chat_with_model(self, system_call, user_call): - chat = self.assemble_chat(system_call, user_call) - response = await self.client.ainvoke(chat) - content = re.sub(r'.*?\n\n', '', response.content, flags=re.DOTALL) - return content diff --git a/chat2db/manager/column_info_manager.py b/chat2db/manager/column_info_manager.py deleted file mode 100644 index 789f499..0000000 --- a/chat2db/manager/column_info_manager.py +++ /dev/null @@ -1,69 +0,0 @@ -from sqlalchemy import and_ -import sys -from chat2db.database.postgres import ColumnInfo, PostgresDB - - -class ColumnInfoManager(): - @staticmethod - async def add_column_info_with_table_id(table_id, column_name, column_type, column_note): - column_info_entry = ColumnInfo(table_id=table_id, column_name=column_name, - column_type=column_type, column_note=column_note) - with PostgresDB.get_session() as session: - session.add(column_info_entry) - session.commit() - - @staticmethod - async def del_column_info_by_column_id(column_id): - with PostgresDB.get_session() as session: - column_info_to_delete = session.query(ColumnInfo).filter(ColumnInfo.id == column_id) - session.delete(column_info_to_delete) - session.commit() - - @staticmethod - async def get_column_info_by_column_id(column_id): - tmp_dict = {} - with PostgresDB.get_session() as session: - result = session.query(ColumnInfo).filter(ColumnInfo.id == column_id).first() - session.commit() - if not result: - return None - tmp_dict = { - 'column_id': result.id, - 'table_id': result.table_id, - 'column_name': result.column_name, - 'column_type': result.column_type, - 'column_note': result.column_note, - 'enable': result.enable - } - return tmp_dict - - @staticmethod - async def update_column_info_enable(column_id, enable=True): - with PostgresDB.get_session() as session: - column_info = session.query(ColumnInfo).filter(ColumnInfo.id == column_id).first() - if column_info is not None: - column_info.enable = True - session.commit() - else: - return False - return True - - @staticmethod - async def get_column_info_by_table_id(table_id, enable=None): - column_info_list = [] - with PostgresDB.get_session() as session: - if enable is None: - results = session.query(ColumnInfo).filter(ColumnInfo.table_id == table_id).all() - else: - results = session.query(ColumnInfo).filter( - and_(ColumnInfo.table_id == table_id, ColumnInfo.enable == enable)).all() - for result in results: - tmp_dict = { - 'column_id': result.id, - 'column_name': result.column_name, - 'column_type': result.column_type, - 'column_note': result.column_note, - 'enable': result.enable - } - column_info_list.append(tmp_dict) - return column_info_list diff --git a/chat2db/manager/database_info_manager.py b/chat2db/manager/database_info_manager.py deleted file mode 100644 index cc234fb..0000000 --- a/chat2db/manager/database_info_manager.py +++ /dev/null @@ -1,98 +0,0 @@ -import json -import hashlib -import sys -import logging -from chat2db.database.postgres import DatabaseInfo, PostgresDB -from chat2db.security.security import Security - -logging.basicConfig(stream=sys.stdout, level=logging.INFO, - format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') - - -class DatabaseInfoManager(): - @staticmethod - async def add_database(database_url: str): - id = None - with PostgresDB.get_session() as session: - encrypted_database_url, encrypted_config = Security.encrypt(database_url) - hashmac = hashlib.sha256(database_url.encode('utf-8')).hexdigest() - counter = session.query(DatabaseInfo).filter(DatabaseInfo.hashmac == hashmac).first() - if counter: - return id - encrypted_config = json.dumps(encrypted_config) - database_info_entry = DatabaseInfo(encrypted_database_url=encrypted_database_url, - encrypted_config=encrypted_config, hashmac=hashmac) - session.add(database_info_entry) - session.commit() - id = database_info_entry.id - return id - - @staticmethod - async def del_database_by_id(id): - with PostgresDB.get_session() as session: - database_info_to_delete = session.query(DatabaseInfo).filter(DatabaseInfo.id == id).first() - if database_info_to_delete: - session.delete(database_info_to_delete) - else: - return False - session.commit() - return True - - @staticmethod - async def del_database_by_url(database_url): - with PostgresDB.get_session() as session: - hashmac = hashlib.sha256(database_url.encode('utf-8')).hexdigest() - database_info_entry = session.query(DatabaseInfo).filter(DatabaseInfo.hashmac == hashmac).first() - if database_info_entry: - database_info_to_delete = session.query(DatabaseInfo).filter(DatabaseInfo.id == database_info_entry.id).first() - if database_info_to_delete: - session.delete(database_info_to_delete) - else: - return False - else: - return False - session.commit() - return True - - @staticmethod - async def get_database_url_by_id(id): - with PostgresDB.get_session() as session: - result = session.query( - DatabaseInfo.encrypted_database_url, DatabaseInfo.encrypted_config).filter( - DatabaseInfo.id == id).first() - if result is None: - return None - try: - encrypted_database_url, encrypted_config = result - encrypted_config = json.loads(encrypted_config) - except Exception as e: - logging.error(f'数据库url解密失败由于{e}') - return None - if encrypted_database_url: - database_url = Security.decrypt(encrypted_database_url, encrypted_config) - else: - return None - return database_url - @staticmethod - async def get_database_id_by_url(database_url: str): - with PostgresDB.get_session() as session: - hashmac = hashlib.sha256(database_url.encode('utf-8')).hexdigest() - database_info_entry = session.query(DatabaseInfo).filter(DatabaseInfo.hashmac == hashmac).first() - if database_info_entry: - return database_info_entry.id - return None - @staticmethod - async def get_all_database_info(): - with PostgresDB.get_session() as session: - results = session.query(DatabaseInfo).order_by(DatabaseInfo.created_at).all() - database_info_list = [] - for i in range(len(results)): - database_id = results[i].id - encrypted_database_url = results[i].encrypted_database_url - encrypted_config = json.loads(results[i].encrypted_config) - created_at = results[i].created_at - if encrypted_database_url: - database_url = Security.decrypt(encrypted_database_url, encrypted_config) - tmp_dict = {'database_id': database_id, 'database_url': database_url, 'created_at': created_at} - database_info_list.append(tmp_dict) - return database_info_list diff --git a/chat2db/manager/sql_example_manager.py b/chat2db/manager/sql_example_manager.py deleted file mode 100644 index 67ccbca..0000000 --- a/chat2db/manager/sql_example_manager.py +++ /dev/null @@ -1,76 +0,0 @@ -import json -from sqlalchemy import and_ -import sys -from chat2db.database.postgres import SqlExample, PostgresDB -from chat2db.security.security import Security - - -class SqlExampleManager(): - @staticmethod - async def add_sql_example(question, sql, table_id, question_vector): - id = None - sql_example_entry = SqlExample(question=question, sql=sql, - table_id=table_id, question_vector=question_vector) - with PostgresDB.get_session() as session: - session.add(sql_example_entry) - session.commit() - id = sql_example_entry.id - return id - - @staticmethod - async def del_sql_example_by_id(id): - with PostgresDB.get_session() as session: - sql_example_to_delete = session.query(SqlExample).filter(SqlExample.id == id).first() - if sql_example_to_delete: - session.delete(sql_example_to_delete) - else: - return False - session.commit() - return True - - @staticmethod - async def update_sql_example_by_id(id, question, sql, question_vector): - with PostgresDB.get_session() as session: - sql_example_to_update = session.query(SqlExample).filter(SqlExample.id == id).first() - if sql_example_to_update: - sql_example_to_update.sql = sql - sql_example_to_update.question = question - sql_example_to_update.question_vector = question_vector - session.commit() - else: - return False - return True - - @staticmethod - async def query_sql_example_by_table_id(table_id): - with PostgresDB.get_session() as session: - results = session.query(SqlExample).filter(SqlExample.table_id == table_id).all() - sql_example_list = [] - for result in results: - tmp_dict = { - 'sql_example_id': result.id, - 'question': result.question, - 'sql': result.sql - } - sql_example_list.append(tmp_dict) - return sql_example_list - - @staticmethod - async def get_topk_sql_example_by_cos_dis(question_vector, table_id_list=None, topk=3): - with PostgresDB.get_session() as session: - if table_id_list is not None: - sql_example_list = session.query( - SqlExample - ).filter(SqlExample.table_id.in_(table_id_list)).order_by( - SqlExample.question_vector.cosine_distance(question_vector) - ).limit(topk).all() - else: - sql_example_list = session.query( - SqlExample - ).order_by( - SqlExample.question_vector.cosine_distance(question_vector) - ).limit(topk).all() - sql_example_list = [ - {'table_id': sql_example.table_id, 'question': sql_example.question, 'sql': sql_example.sql} - for sql_example in sql_example_list] - return sql_example_list diff --git a/chat2db/manager/table_info_manager.py b/chat2db/manager/table_info_manager.py deleted file mode 100644 index fcf4f66..0000000 --- a/chat2db/manager/table_info_manager.py +++ /dev/null @@ -1,87 +0,0 @@ -from sqlalchemy import and_ -import sys -from chat2db.database.postgres import TableInfo, PostgresDB - - -class TableInfoManager(): - @staticmethod - async def add_table_info(database_id, table_name, table_note, table_note_vector): - id = None - with PostgresDB.get_session() as session: - counter = session.query(TableInfo).filter( - and_(TableInfo.database_id == database_id, TableInfo.table_name == table_name)).first() - if counter: - return id - table_info_entry = TableInfo(database_id=database_id, table_name=table_name, - table_note=table_note, table_note_vector=table_note_vector) - session.add(table_info_entry) - session.commit() - id = table_info_entry.id - return id - - @staticmethod - async def del_table_by_id(id): - with PostgresDB.get_session() as session: - table_info_to_delete = session.query(TableInfo).filter(TableInfo.id == id).first() - if table_info_to_delete: - session.delete(table_info_to_delete) - else: - return False - session.commit() - return True - - @staticmethod - async def get_table_info_by_table_id(table_id): - with PostgresDB.get_session() as session: - table_id, database_id, table_name, table_note = session.query( - TableInfo.id, TableInfo.database_id, TableInfo.table_name, TableInfo.table_note).filter( - TableInfo.id == table_id).first() - if table_id is None: - return None - return { - 'table_id': table_id, - 'database_id': database_id, - 'table_name': table_name, - 'table_note': table_note - } - - @staticmethod - async def get_table_id_by_database_id_and_table_name(database_id, table_name): - with PostgresDB.get_session() as session: - table_info_entry = session.query( - TableInfo).filter( - TableInfo.database_id == database_id, - TableInfo.table_name == table_name, - ).first() - if table_info_entry: - return table_info_entry.id - return None - - @staticmethod - async def get_table_info_by_database_id(database_id, enable=None): - with PostgresDB.get_session() as session: - if enable is None: - results = session.query( - TableInfo).filter(TableInfo.database_id == database_id).all() - else: - results = session.query( - TableInfo).filter( - and_(TableInfo.database_id == database_id, - TableInfo.enable == enable - )).all() - table_info_list = [] - for result in results: - table_info_list.append({'table_id': result.id, 'table_name': result.table_name, - 'table_note': result.table_note, 'created_at': result.created_at}) - return table_info_list - - @staticmethod - async def get_topk_table_by_cos_dis(database_id, tmp_vector, topk=3): - with PostgresDB.get_session() as session: - results = session.query( - TableInfo.id - ).filter(TableInfo.database_id == database_id).order_by( - TableInfo.table_note_vector.cosine_distance(tmp_vector) - ).limit(topk).all() - table_id_list = [result[0] for result in results] - return table_id_list diff --git a/chat2db/model/request.py b/chat2db/model/request.py deleted file mode 100644 index 6d8c955..0000000 --- a/chat2db/model/request.py +++ /dev/null @@ -1,89 +0,0 @@ -import uuid -from pydantic import BaseModel, Field -from typing import Optional - -class QueryRequest(BaseModel): - question: str - topk_sql: int = 5 - topk_answer: int = 15 - use_llm_enhancements: bool = False - - -class DatabaseAddRequest(BaseModel): - database_url: str - - -class DatabaseDelRequest(BaseModel): - database_id: Optional[uuid.UUID] = Field(default=None, description="数据库id") - database_url: Optional[str] = Field(default=None, description="数据库url") - -class DatabaseSqlGenerateRequest(BaseModel): - database_url: str - table_name_list: Optional[list[str]] = Field(default=[]) - question: str - topk: int = 5 - use_llm_enhancements: Optional[bool] = Field(default=False) - -class TableAddRequest(BaseModel): - database_id: uuid.UUID - table_name: str - - -class TableDelRequest(BaseModel): - table_id: uuid.UUID - - -class TableQueryRequest(BaseModel): - database_id: uuid.UUID - - -class EnableColumnRequest(BaseModel): - column_id: uuid.UUID - enable: bool - - -class SqlExampleAddRequest(BaseModel): - table_id: uuid.UUID - question: str - sql: str - - -class SqlExampleDelRequest(BaseModel): - sql_example_id: uuid.UUID - - -class SqlExampleQueryRequest(BaseModel): - table_id: uuid.UUID - - -class SqlExampleUpdateRequest(BaseModel): - sql_example_id: uuid.UUID - question: str - sql: str - - -class SqlGenerateRequest(BaseModel): - database_id: uuid.UUID - table_id_list: list[uuid.UUID] = [] - question: str - topk: int = 5 - use_llm_enhancements: bool = True - - -class SqlRepairRequest(BaseModel): - database_id: uuid.UUID - table_id: uuid.UUID - sql: str - message: str = Field(..., max_length=2048) - question: str - - -class SqlExcuteRequest(BaseModel): - database_id: uuid.UUID - sql: str - - -class SqlExampleGenerateRequest(BaseModel): - table_id: uuid.UUID - generate_cnt: int = 1 - sql_var: bool = False diff --git a/chat2db/model/response.py b/chat2db/model/response.py deleted file mode 100644 index fd7c2e7..0000000 --- a/chat2db/model/response.py +++ /dev/null @@ -1,6 +0,0 @@ -from pydantic import BaseModel -from typing import Any -class ResponseData(BaseModel): - code: int - message: str - result: Any \ No newline at end of file diff --git a/chat2db/scripts/chat2db_config/config.yaml b/chat2db/scripts/chat2db_config/config.yaml deleted file mode 100644 index 78e3719..0000000 --- a/chat2db/scripts/chat2db_config/config.yaml +++ /dev/null @@ -1,2 +0,0 @@ -UVICORN_IP: 0.0.0.0 -UVICORN_PORT: '9015' diff --git a/chat2db/scripts/docs/output_examples.xlsx b/chat2db/scripts/docs/output_examples.xlsx deleted file mode 100644 index 599501ca6c0f1d2b88fe5235d40fb11a56fbf005..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6080 zcmZ`-cQ~8<8V+jDsMypfwfCsnv$n)6s%k_8i9KqIQDRn6YHw<^MNxayZqZV^R@I)R zsCm+U=bWC;^_{#|-e2-w*M0wSKjVF#8?1$MjS2t&5MaLY04ocPJ9=0EK>RfT;5z1s zxw13T1B&#pG<@U&b+-`kK{!<;4In#&C{&Nm(Y@0=!X6MKP>AY{0qW8Nocn%Ce2mlI zNvJMbp6-IpAvq|Wa@*}0_ntTzT7HIS{4BYoEmY@uw?uaD&~6kL`ee(jc(W<%&vMbo zBw6EfsdbjM+k&t}k%#xt&$(u`q8cL9tblAjm4>Y1=wopmsG)CB5sR~Lvw3Ogt=%Mp z4c3UzyCtJDki(j#p$!&7M)al?zmsVpSolsE$kjri>8EO+WEe_XnsiKJQJgqzM$4Wz z2uC8|Ws_(heQ!6<$7qLp96G~asn|5fXdYV}z9TV$iofbq zjHV{%RzGL=-fcO$KrHaz^#7i%F|~%# zT?-okAjX`QZcrz8L4jZQio|x^P9YM79e>rO)8|Nag51JJmVz&wVsdRakqmEQ^WTMT z??fdiki^3A<6JjaU;`T;WMbE2^$!_I_{!+1a0dn-54H_lz+K6Ci18k?ZJ0SB7G=lmxHSN=@!3?%d(`rw?$#YHGEe zH0lMk(i>wt6OMzTOAYF5kA3v#Q@=(h*)oznM^k)U$>uCjbWbTwhNL$mAp-s1HHzSP zqb6zAj-Y~y3Wd$?$jd%5oe?^J(q%`5AxWMkZ*@Pn$P8sqhbN%>e2K-yJZ?O_(?Zcw zR=0;1Z}e(peP|>ZkfMw1krOnq)gy2R+5kk@M~^t%2Qzw z;GNA4%Loi9lI&9TQu&c<4p8P(SkI{KCUNZcm{c12L4u!PTnhOyGhryMO-|$^{`@44 z1CmsgMXaG0_U42fXTvPXCHh+nK3taqmr2sg{|j1igsm*>^GskQ>P~UGPqPj;Z`JGl z#^C%zYwP1}+&f&PhF+IR^|4zN@)^euwVqU0$rmfTY1Df!tVI^@%Y|FNc9DP;v& zH?zxT;B(U?p1-5J?Ql?Q{?GP2<#QY{xex|tkNmKyyJUU-UoK)XnC{4Gdg<9_= z`q7T{JgJP^XM$1<{5RsN)gX@bgP87#c_B5gPfYZo=gwnOFR%4O59=LO_S5UfeJ?V) zB>x`1P+A8Lbxim;G3ibICw%`&>{!U}s5R|_#%AHn4(w*)rSG&1m4a2^L!7f>#!S*x zOrI`fHw?L5zSf9-{qk|p_w5}CbU?1>r&Y#84H6Phc-ZJEqnt$ixMW+#ikS;kY}rK# zziiQAlELp-Y`M+;K_jEX&a^7k+p6w}#wa*sL<{QFDGO+tbx!~IMK#uQ(d7j$e%QG7+5b}*QomIQNepM@qctCAVy z74^uYwx*_aUntn^k>o8>e*Yj60@_KM8+OQ-&g9K3f>Ym79yhk$35>vo_GFHB97q)y zG;G3c`$+D|&u3D#aI!|P&}%zr zRl^ig7c1Y;4oFr7;$MpiE-^%f2rIS_*IFFCGuidXgfcLp{HA1&GWJj1b&;}}U!AC8 zfr_?1gVE;ZGqk|Cm0YST!c;<(TQi59$-Cx_G6F&0FQ7Y?>&*RZ zxGqt1A-v@rBJby%tcYrmC>CHYbryNyUrt>`@e%gT`lPTysC1j@LRUNMmhna(Pq7q- zCHOlbN2{Z7c)zvRT`eXo@nAz5Xu$d?rL9(Ns+{qW@r!ypF{v`7PTbrUK}f`?xva1O z@R~}JrMnBvt93Jdy43brBiBzUbuKIy0Si+2i)#-9_6Lldb00OUJdG5%4E7RY2!7q4 zf!L<@ZmgE4b-`nzIXV2U!VVg%XOUq9BjYdD%0Bu$$@Js1zw4Ip1gYw6?LU0Fu@S^RQB>v?QA0zr#Yy66Tw$CMST?1VP9@FnNG2L zblMnNQY=WZ$q***lUf|+68Ww3COREh#IDUJrdBT`_;6JEz#g!7Q)S;NCNb5DvtRu> zD?QP0_2Uax`HpTSVanXXdIh2Hc}4wb@h`3XE{Nhw1&L<`tS+e`E1~&?W)DX}3dbQp z1so73nc)#JF^NnJYn-))RCBSvTBu8{mL%TYnD%gl$V4k~vl zUAwHN^hXvxQ7x^~Z@#v|qa!`s#KS31Q>Z@YoZJR6mdS|6nWWMw!r6>l&KN|#*_fZ# z=gk!v*gV;iq%h@l4fSjyMaI4rI7XdZTzN0UE5m3{F^x+W7XTprcMW+wa)SOYo@T?j z=nNr>z#U8A_rMjN1N&&-10v$^S}G)QKrFp@!i~h3=vm7-jl~zl-h4C4mhay$r90cT zyjzNYnz;17;<_4*ShGC1!xKTD+lac5V%m{|EC#PdXyiXqgl(_d{ABeO(z=*&C%Z;h z!)s42lD8{`%rEWvWB6|pAd(O=aoJ*xj2FXo`a4a!)rjdumQD;z2-MgV}9e!Mq zcnRUW z$$JiGtn1~gzO5)a%2vXMRP?bAGe2tk_G@*^Jq;TGgI7DP)V3xF?4Ahh<%FMeO5`XJ zOXfzt^>}oh!RqBu6)hL!L!tkncF?p@G1bqalvM~5$lsX=-5M$17ag@IU{??ow=!3) zy3K?*6fhHc!+U8yxyCTLmZCEu)cezZ(f^|#QQL=09@Sz{t9qM&z*;OwFm-*$Cl(7? z8l!|IJN&QD-D;7w$Mi6R^#EFOYdG)#;V z@)!6SI5NgQppb`pvyqU12awjT$u`u~=8X&8BJ$kOtp(o8EAuSviJIXnOor$){IySD zI<1F26aoF~{(CUlGS!KnR-tX#=T&Q|i@PrUq4If!#AHGER&Cjrnv$L<+1JH%e&uO> zNO$EeAR!@yg61ZYkpf(rT%TlF4ZfzjZaM$sZsML@)rZ<>z#FNXS$q3|LB~uh>u>Od zmG7#k_dl)ucE8dz@}?ZQJ!Nf$bm&E4N_atsMlhY8`t);3xW)9zJkP8lFp3cSy9#o# z7Q~&5oT@#rVi*{Z;UJu0a0S}nY5}K0+Ird~?7W(d9jAngqQWT|amJ_ffjcvjARv@ z7KI$9T@(@LCdjQkw%ZX{cd@f>RgQb1&PDKEn~$)pe(z2e7bAmsxA4dl$=7IYO=jJ2 zOV@fsinRoxIA5$MpH$qa8AAA=BEgB&wXLt;Wv);q^JV$Yu#m_tbuSoOf2GA*tFX?XLwAglWZ+y3 zY!~4pmZrJrLHr>6^oGPNlZ?h8;nK-z%W2C_OLTy@5&z3+1P$rf*vevtWs{paGo+jB zbJCM^5$6w-Nn|Il(gfY|erji_xtOBM=9NPB9dFj3q`p`c>fnxabtBB>Gkr{1c)vrV z&8$yaURPg8Zf@TWrl4T+rYF?8F*loZ#p$~dm#1QEOnXBIy~(U@ssUg;Y1zjPKAZN* z=|ei0lmLbwRBS4!qTKyZ4fTEpH`Mf=fQ*Vr2Nt)@5-q7Ky<^Rh(SFcrl{3#YVh+P z;>xsUZ6v%D`>_dXXwBu0B^K-Q^VaDTq3Iq|OSn<;kc5I3Dm7JG4+cIKNKzZ!A*N+? zGtHqvRrMMw@%hbVtZBwk2b9f2#NG<&$2ba=o)5p`XIIO`CLl z0e3%aZwlhf+X$-HUEuwFlx{<=zv94*0btDj0>q2~w$3(SH)j`jK^tc`=}0$ZEiv1K(ww6_ zmCER$8fZ4KwtMareL(rMJ$7_PF?vLox8x&$jQ@Efny#ZJT(XSxo?x3QRso|`k(%@l zY5T-$CiV5id9dP)18*sLA>wuJW_oS#zF0sTrOdZc1Ajob9b4Y@#A-r#ppzu_?atwl zYu}g#6G<$an9Dt?N%P&lC3TSF*;T+?-F7llV*=)G1qhodUyuYg|i@M&ITg zpOocx(@#q8>`lKQ24_W_rTgG9y3YlomdXFF>4S#i5pRs96qpr-;(s-@a&h@f(stdA z->baplB}mb>w%$^T1oJ|U6n%WdknBo_iopIqsvQq*`x6}0h_m~LDbzdK%};Q>W#z( zOWt0kZ5JPm?)Db%*IEw0shP}68@fcdW4zQf>6&8#S+11vVWHa?Eqe5T4&)EW86 z9HZM>HqSWU#8>1QcJm*3BE)8V&Sz2Vazp!CWYX9sik{5f3SR2>Jr2f)`!}-4vn5Xx zz8BP!wbNH{ofn)rdmOAf_9MON%jZXO)fF9wcRmh3SVrk-y&miD5WyWVGriW{O25c# z*jk+|0xVp${}{q~LsaYr+|fOynJ0|7*ftZbCS{F3ol9Fost##6K8_#-7T`~aUvICM z^e18n$|s21kU^k5^Vtm7@VY(YNnNw+^R~_h)ZX$N-W?iJ4Ej0r7+)azC!gM&%Suny zyrr{gAU0SF3!4h(|9hpFNd5iRp3=m><=&z zQ-=TFA-l?R)jRsbQ-w)A49`D0Nmp5}S{Q#=Trrd1zghlcXj}zfUB&-^m+}62F~7=i zbrJc)z(e#G!++P3tI(^{=^rSV&Sr{rdMW>^p6Q diff --git a/chat2db/scripts/run_chat2db.py b/chat2db/scripts/run_chat2db.py deleted file mode 100644 index 5da9e48..0000000 --- a/chat2db/scripts/run_chat2db.py +++ /dev/null @@ -1,436 +0,0 @@ -import argparse -import os -import pandas as pd -import requests -import yaml -from fastapi import FastAPI -import shutil - -terminal_width = shutil.get_terminal_size().columns -app = FastAPI() - -CHAT2DB_CONFIG_PATH = './chat2db_config' -CONFIG_YAML_PATH = './chat2db_config/config.yaml' -DEFAULT_CHAT2DB_CONFIG = { - "UVICORN_IP": "127.0.0.1", - "UVICORN_PORT": "8000" -} - - -# 修改 -def update_config(uvicorn_ip, uvicorn_port): - try: - yml = {'UVICORN_IP': uvicorn_ip, 'UVICORN_PORT': uvicorn_port} - with open(CONFIG_YAML_PATH, 'w') as file: - yaml.dump(yml, file) - return {"message": "修改成功"} - except Exception as e: - return {"message": f"修改失败,由于:{e}"} - - -# 增加数据库 -def call_add_database_info(database_url): - url = f"http://{config['UVICORN_IP']}:{config['UVICORN_PORT']}/database/add" - request_body = { - "database_url": database_url - } - response = requests.post(url, json=request_body) - return response.json() - - -# 删除数据库 -def call_del_database_info(database_id): - url = f"http://{config['UVICORN_IP']}:{config['UVICORN_PORT']}/database/del" - request_body = { - "database_id": database_id - } - response = requests.post(url, json=request_body) - return response.json() - - -# 查询数据库配置 -def call_query_database_info(): - url = f"http://{config['UVICORN_IP']}:{config['UVICORN_PORT']}/database/query" - response = requests.get(url) - return response.json() - - -# 查询数据库内表格配置 -def call_list_table_in_database(database_id, table_filter=''): - url = f"http://{config['UVICORN_IP']}:{config['UVICORN_PORT']}/database/list" - params = { - "database_id": database_id, - "table_filter": table_filter - } - print(params) - response = requests.get(url, params=params) - return response.json() - - -# 增加数据表 -def call_add_table_info(database_id, table_name): - url = f"http://{config['UVICORN_IP']}:{config['UVICORN_PORT']}/table/add" - request_body = { - "database_id": database_id, - "table_name": table_name - } - response = requests.post(url, json=request_body) - return response.json() - - -# 删除数据表 -def call_del_table_info(table_id): - url = f"http://{config['UVICORN_IP']}:{config['UVICORN_PORT']}/table/del" - request_body = { - "table_id": table_id - } - response = requests.post(url, json=request_body) - return response.json() - - -# 查询数据表配置 -def call_query_table_info(database_id): - url = f"http://{config['UVICORN_IP']}:{config['UVICORN_PORT']}/table/query" - params = { - "database_id": database_id - } - response = requests.get(url, params=params) - return response.json() - - -# 查询数据表列信息 -def call_query_column(table_id): - url = f"http://{config['UVICORN_IP']}:{config['UVICORN_PORT']}/table/column/query" - params = { - "table_id": table_id - } - response = requests.get(url, params=params) - return response.json() - - -# 启用禁用列 -def call_enable_column(column_id, enable): - url = f"http://{config['UVICORN_IP']}:{config['UVICORN_PORT']}/table/column/enable" - request_body = { - "column_id": column_id, - "enable": enable - } - response = requests.post(url, json=request_body) - return response.json() - - -# 增加sql_example案例 -def call_add_sql_example(table_id, question, sql): - url = f"http://{config['UVICORN_IP']}:{config['UVICORN_PORT']}/sql/example/add" - request_body = { - "table_id": table_id, - "question": question, - "sql": sql - } - response = requests.post(url, json=request_body) - return response.json() - - -# 删除sql_example案例 -def call_del_sql_example(sql_example_id): - url = f"http://{config['UVICORN_IP']}:{config['UVICORN_PORT']}/sql/example/del" - request_body = { - "sql_example_id": sql_example_id - } - response = requests.post(url, json=request_body) - return response.json() - - -# 查询sql_example案例 -def call_query_sql_example(table_id): - url = f"http://{config['UVICORN_IP']}:{config['UVICORN_PORT']}/sql/example/query" - params = { - "table_id": table_id - } - response = requests.get(url, params=params) - return response.json() - - -# 更新sql_example案例 -def call_update_sql_example(sql_example_id, question, sql): - url = f"http://{config['UVICORN_IP']}:{config['UVICORN_PORT']}/sql/example/update" - request_body = { - "sql_example_id": sql_example_id, - "question": question, - "sql": sql - } - response = requests.post(url, json=request_body) - return response.json() - - -# 生成sql_example案例 -def call_generate_sql_example(table_id, generate_cnt=1, sql_var=False): - url = f"http://{config['UVICORN_IP']}:{config['UVICORN_PORT']}/sql/example/generate" - response_body = { - "table_id": table_id, - "generate_cnt": generate_cnt, - "sql_var": sql_var - } - response = requests.post(url, json=response_body) - return response.json() - - -def write_sql_example_to_excel(dir, sql_example_list): - try: - if not os.path.exists(os.path.dirname(dir)): - os.makedirs(os.path.dirname(dir)) - data = { - 'question': [], - 'sql': [] - } - for sql_example in sql_example_list: - data['question'].append(sql_example['question']) - data['sql'].append(sql_example['sql']) - - df = pd.DataFrame(data) - df.to_excel(dir, index=False) - - print("Data written to Excel file successfully.") - except Exception as e: - print("Error writing data to Excel file:", str(e)) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="chat2DB脚本") - subparsers = parser.add_subparsers(dest="command", help="子命令列表") - - # 修改config.yaml - parser_config = subparsers.add_parser("config", help="修改config.yaml") - parser_config.add_argument("--ip", type=str, required=True, help="uvicorn ip") - parser_config.add_argument("--port", type=str, required=True, help="uvicorn port") - - # 增加数据库 - parser_add_database = subparsers.add_parser("add_db", help="增加指定数据库") - parser_add_database.add_argument("--database_url", type=str, required=True, - help="数据库连接地址,如postgresql+psycopg2://postgres:123456@0.0.0.0:5432/postgres") - - # 删除数据库 - parser_del_database = subparsers.add_parser("del_db", help="删除指定数据库") - parser_del_database.add_argument("--database_id", type=str, required=True, help="数据库id") - - # 查询数据库配置 - parser_query_database = subparsers.add_parser("query_db", help="查询指定数据库配置") - - # 查询数据库内表格配置 - parser_list_table_in_database = subparsers.add_parser("list_tb_in_db", help="查询数据库内表格配置") - parser_list_table_in_database.add_argument("--database_id", type=str, required=True, help="数据库id") - parser_list_table_in_database.add_argument("--table_filter", type=str, required=False, help="表格名称过滤条件") - - # 增加数据表 - parser_add_table = subparsers.add_parser("add_tb", help="增加指定数据库内的数据表") - parser_add_table.add_argument("--database_id", type=str, required=True, help="数据库id") - parser_add_table.add_argument("--table_name", type=str, required=True, help="数据表名称") - - # 删除数据表 - parser_del_table = subparsers.add_parser("del_tb", help="删除指定数据表") - parser_del_table.add_argument("--table_id", type=str, required=True, help="数据表id") - - # 查询数据表配置 - parser_query_table = subparsers.add_parser("query_tb", help="查询指定数据表配置") - parser_query_table.add_argument("--database_id", type=str, required=True, help="数据库id") - - # 查询数据表列信息 - parser_query_column = subparsers.add_parser("query_col", help="查询指定数据表详细列信息") - parser_query_column.add_argument("--table_id", type=str, required=True, help="数据表id") - - # 启用禁用列 - parser_enable_column = subparsers.add_parser("enable_col", help="启用禁用指定列") - parser_enable_column.add_argument("--column_id", type=str, required=True, help="列id") - parser_enable_column.add_argument("--enable", type=bool, required=True, help="是否启用") - - # 增加sql案例 - parser_add_sql_example = subparsers.add_parser("add_sql_exp", help="增加指定数据表sql案例") - parser_add_sql_example.add_argument("--table_id", type=str, required=True, help="数据表id") - parser_add_sql_example.add_argument("--question", type=str, required=False, help="问题") - parser_add_sql_example.add_argument("--sql", type=str, required=False, help="sql") - parser_add_sql_example.add_argument("--dir", type=str, required=False, help="输入路径") - - # 删除sql_exp - parser_del_sql_example = subparsers.add_parser("del_sql_exp", help="删除指定sql案例") - parser_del_sql_example.add_argument("--sql_example_id", type=str, required=True, help="sql案例id") - - # 查询sql案例 - parser_query_sql_example = subparsers.add_parser("query_sql_exp", help="查询指定数据表sql对案例") - parser_query_sql_example.add_argument("--table_id", type=str, required=True, help="数据表id") - - # 更新sql案例 - parser_update_sql_example = subparsers.add_parser("update_sql_exp", help="更新sql对案例") - parser_update_sql_example.add_argument("--sql_example_id", type=str, required=True, help="sql案例id") - parser_update_sql_example.add_argument("--question", type=str, required=True, help="sql语句对应的问题") - parser_update_sql_example.add_argument("--sql", type=str, required=True, help="sql语句") - - # 生成sql案例 - parser_generate_sql_example = subparsers.add_parser("generate_sql_exp", help="生成指定数据表sql对案例") - parser_generate_sql_example.add_argument("--table_id", type=str, required=True, help="数据表id") - parser_generate_sql_example.add_argument("--generate_cnt", type=int, required=False, help="生成sql对数量", - default=1) - parser_generate_sql_example.add_argument("--sql_var", type=bool, required=False, - help="是否验证生成的sql对,True为验证,False不验证", - default=False) - parser_generate_sql_example.add_argument("--dir", type=str, required=False, help="生成的sql对输出路径", - default="templetes/output_examples.xlsx") - - args = parser.parse_args() - - if os.path.exists(CONFIG_YAML_PATH): - exist = True - with open(CONFIG_YAML_PATH, 'r') as file: - yml = yaml.safe_load(file) - config = { - 'UVICORN_IP': yml.get('UVICORN_IP'), - 'UVICORN_PORT': yml.get('UVICORN_PORT'), - } - else: - exist = False - - if args.command == "config": - if not exist: - os.makedirs(CHAT2DB_CONFIG_PATH, exist_ok=True) - with open(CONFIG_YAML_PATH, 'w') as file: - yaml.dump(DEFAULT_CHAT2DB_CONFIG, file, default_flow_style=False) - response = update_config(args.ip, args.port) - with open(CONFIG_YAML_PATH, 'r') as file: - yml = yaml.safe_load(file) - config = { - 'UVICORN_IP': yml.get('UVICORN_IP'), - 'UVICORN_PORT': yml.get('UVICORN_PORT'), - } - print(response.get("message")) - elif not exist: - print("please update_config first") - - elif args.command == "add_db": - response = call_add_database_info(args.database_url) - database_id = response.get("result")['database_id'] - print(response.get("message")) - if response.get("code") == 200: - print(f'database_id: ', database_id) - - elif args.command == "del_db": - response = call_del_database_info(args.database_id) - print(response.get("message")) - - elif args.command == "query_db": - response = call_query_database_info() - print(response.get("message")) - if response.get("code") == 200: - database_info = response.get("result")['database_info_list'] - for database in database_info: - print('-' * terminal_width) - print("database_id:", database["database_id"]) - print("database_url:", database["database_url"]) - print("created_at:", database["created_at"]) - print('-' * terminal_width) - - elif args.command == "list_tb_in_db": - response = call_list_table_in_database(args.database_id, args.table_filter) - print(response.get("message")) - if response.get("code") == 200: - table_name_list = response.get("result")['table_name_list'] - for table_name in table_name_list: - print(table_name) - - elif args.command == "add_tb": - response = call_add_table_info(args.database_id, args.table_name) - print(response.get("message")) - table_id = response.get("result")['table_id'] - if response.get("code") == 200: - print('table_id: ', table_id) - - elif args.command == "del_tb": - response = call_del_table_info(args.table_id) - print(response.get("message")) - - elif args.command == "query_tb": - response = call_query_table_info(args.database_id) - print(response.get("message")) - if response.get("code") == 200: - table_list = response.get("result")['table_info_list'] - for table in table_list: - print('-' * terminal_width) - print("table_id:", table['table_id']) - print("table_name:", table['table_name']) - print("table_note:", table['table_note']) - print("created_at:", table['created_at']) - print('-' * terminal_width) - - elif args.command == "query_col": - response = call_query_column(args.table_id) - print(response.get("message")) - if response.get("code") == 200: - column_list = response.get("result")['column_info_list'] - for column in column_list: - print('-' * terminal_width) - print("column_id:", column['column_id']) - print("column_name:", column['column_name']) - print("column_note:", column['column_note']) - print("column_type:", column['column_type']) - print("enable:", column['enable']) - print('-' * terminal_width) - - elif args.command == "enable_col": - response = call_enable_column(args.column_id, args.enable) - print(response.get("message")) - - elif args.command == "add_sql_exp": - def get_sql_exp(dir): - if not os.path.exists(os.path.dirname(dir)): - return None - # 读取 xlsx 文件 - df = pd.read_excel(dir) - - # 遍历每一行数据 - for index, row in df.iterrows(): - question = row['question'] - sql = row['sql'] - - # 调用 call_add_sql_example 函数 - response = call_add_sql_example(args.table_id, question, sql) - print(response.get("message")) - sql_example_id = response.get("result")['sql_example_id'] - print('sql_example_id: ', sql_example_id) - print(question, sql) - - - if args.dir: - get_sql_exp(args.dir) - else: - response = call_add_sql_example(args.table_id, args.question, args.sql) - print(response.get("message")) - sql_example_id = response.get("result")['sql_example_id'] - print('sql_example_id: ', sql_example_id) - - elif args.command == "del_sql_exp": - response = call_del_sql_example(args.sql_example_id) - print(response.get("message")) - - elif args.command == "query_sql_exp": - response = call_query_sql_example(args.table_id) - print(response.get("message")) - if response.get("code") == 200: - sql_example_list = response.get("result")['sql_example_list'] - for sql_example in sql_example_list: - print('-' * terminal_width) - print("sql_example_id:", sql_example['sql_example_id']) - print("question:", sql_example['question']) - print("sql:", sql_example['sql']) - print('-' * terminal_width) - - elif args.command == "update_sql_exp": - response = call_update_sql_example(args.sql_example_id, args.question, args.sql) - print(response.get("message")) - - elif args.command == "generate_sql_exp": - response = call_generate_sql_example(args.table_id, args.generate_cnt, args.sql_var) - print(response.get("message")) - if response.get("code") == 200: - # 输出到execl中 - sql_example_list = response.get("result")['sql_example_list'] - write_sql_example_to_excel(args.dir, sql_example_list) - else: - print("未知命令,请检查输入的命令是否正确。") diff --git a/chat2db/security/security.py b/chat2db/security/security.py deleted file mode 100644 index 0909f27..0000000 --- a/chat2db/security/security.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. - -import base64 -import binascii -import hashlib -import secrets - -from cryptography.hazmat.backends import default_backend -from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes - -from chat2db.config.config import config - - -class Security: - - @staticmethod - def encrypt(plaintext: str) -> tuple[str, dict]: - """ - 加密公共方法 - :param plaintext: - :return: - """ - half_key1 = config['HALF_KEY1'] - - encrypted_work_key, encrypted_work_key_iv = Security._generate_encrypted_work_key( - half_key1) - encrypted_plaintext, encrypted_iv = Security._encrypt_plaintext(half_key1, encrypted_work_key, - encrypted_work_key_iv, plaintext) - del plaintext - secret_dict = { - "encrypted_work_key": encrypted_work_key, - "encrypted_work_key_iv": encrypted_work_key_iv, - "encrypted_iv": encrypted_iv, - "half_key1": half_key1 - } - return encrypted_plaintext, secret_dict - - @staticmethod - def decrypt(encrypted_plaintext: str, secret_dict: dict): - """ - 解密公共方法 - :param encrypted_plaintext: 待解密的字符串 - :param secret_dict: 存放工作密钥的dict - :return: - """ - plaintext = Security._decrypt_plaintext(half_key1=secret_dict.get("half_key1"), - encrypted_work_key=secret_dict.get( - "encrypted_work_key"), - encrypted_work_key_iv=secret_dict.get( - "encrypted_work_key_iv"), - encrypted_iv=secret_dict.get( - "encrypted_iv"), - encrypted_plaintext=encrypted_plaintext) - return plaintext - - @staticmethod - def _get_root_key(half_key1: str) -> bytes: - half_key2 = config['HALF_KEY2'] - key = (half_key1 + half_key2).encode("utf-8") - half_key3 = config['HALF_KEY3'].encode("utf-8") - hash_key = hashlib.pbkdf2_hmac("sha256", key, half_key3, 10000) - return binascii.hexlify(hash_key)[13:45] - - @staticmethod - def _generate_encrypted_work_key(half_key1: str) -> tuple[str, str]: - bin_root_key = Security._get_root_key(half_key1) - bin_work_key = secrets.token_bytes(32) - bin_encrypted_work_key_iv = secrets.token_bytes(16) - bin_encrypted_work_key = Security._root_encrypt(bin_root_key, bin_encrypted_work_key_iv, bin_work_key) - encrypted_work_key = base64.b64encode(bin_encrypted_work_key).decode("ascii") - encrypted_work_key_iv = base64.b64encode(bin_encrypted_work_key_iv).decode("ascii") - return encrypted_work_key, encrypted_work_key_iv - - @staticmethod - def _get_work_key(half_key1: str, encrypted_work_key: str, encrypted_work_key_iv: str) -> bytes: - bin_root_key = Security._get_root_key(half_key1) - bin_encrypted_work_key = base64.b64decode(encrypted_work_key.encode("ascii")) - bin_encrypted_work_key_iv = base64.b64decode(encrypted_work_key_iv.encode("ascii")) - return Security._root_decrypt(bin_root_key, bin_encrypted_work_key_iv, bin_encrypted_work_key) - - @staticmethod - def _root_encrypt(key: bytes, encrypted_iv: bytes, plaintext: bytes) -> bytes: - encryptor = Cipher(algorithms.AES(key), modes.GCM(encrypted_iv), default_backend()).encryptor() - encrypted = encryptor.update(plaintext) + encryptor.finalize() - return encrypted - - @staticmethod - def _root_decrypt(key: bytes, encrypted_iv: bytes, encrypted: bytes) -> bytes: - encryptor = Cipher(algorithms.AES(key), modes.GCM(encrypted_iv), default_backend()).encryptor() - plaintext = encryptor.update(encrypted) - return plaintext - - @staticmethod - def _encrypt_plaintext(half_key1: str, encrypted_work_key: str, encrypted_work_key_iv: str, - plaintext: str) -> tuple[str, str]: - bin_work_key = Security._get_work_key(half_key1, encrypted_work_key, encrypted_work_key_iv) - salt = f"{half_key1}{plaintext}" - plaintext_temp = salt.encode("utf-8") - del plaintext - del salt - bin_encrypted_iv = secrets.token_bytes(16) - bin_encrypted_plaintext = Security._root_encrypt(bin_work_key, bin_encrypted_iv, plaintext_temp) - encrypted_plaintext = base64.b64encode(bin_encrypted_plaintext).decode("ascii") - encrypted_iv = base64.b64encode(bin_encrypted_iv).decode("ascii") - return encrypted_plaintext, encrypted_iv - - @staticmethod - def _decrypt_plaintext(half_key1: str, encrypted_work_key: str, encrypted_work_key_iv: str, - encrypted_plaintext: str, encrypted_iv) -> str: - bin_work_key = Security._get_work_key(half_key1, encrypted_work_key, encrypted_work_key_iv) - bin_encrypted_plaintext = base64.b64decode(encrypted_plaintext.encode("ascii")) - bin_encrypted_iv = base64.b64decode(encrypted_iv.encode("ascii")) - plaintext_temp = Security._root_decrypt(bin_work_key, bin_encrypted_iv, bin_encrypted_plaintext) - plaintext_salt = plaintext_temp.decode("utf-8") - plaintext = plaintext_salt[len(half_key1):] - return plaintext \ No newline at end of file diff --git a/chat2db/templetes/change_txt_to_yaml.py b/chat2db/templetes/change_txt_to_yaml.py deleted file mode 100644 index 8e673d8..0000000 --- a/chat2db/templetes/change_txt_to_yaml.py +++ /dev/null @@ -1,92 +0,0 @@ -import yaml -text = { - 'sql_generate_base_on_example_prompt': '''你是一个数据库专家,你的任务是参考给出的表结构以及表注释和示例,基于给出的问题生成一条在{database_url}连接下可进行查询的sql语句。 -注意: -#01 sql语句中,特殊字段需要带上双引号。 -#02 sql语句中,如果要使用 as,请用双引号把别名包裹起来。 -#03 sql语句中,查询字段必须使用`distinct`关键字去重。 -#04 sql语句中,只返回生成的sql语句, 不要返回其他任何无关的内容 -#05 sql语句中,参考问题,对查询字段进行冗余。 -#06 sql语句中,需要以分号结尾。 - -以下是表结构以及表注释: -{note} -以下是{k}个示例: -{sql_example} -以下是问题: -{question} -''', - 'question_generate_base_on_data_prompt': '''你是一个postgres数据库专家,你的任务是根据给出的表结构和表内数据,输出一个用户可能针对这张表内的信息提出的问题。 -注意: -#01 问题内容和形式需要多样化,例如要用到统计、排序、模糊匹配等相关问题。 -#02 要以口语化的方式输出问题,不要机械的使用表内字段输出问题。 -#03 不要输出问题之外多余的内容! -#04 要基于用户的角度取提出问题,问题内容需要口语化、拟人化。 -#05 优先生成有注释的字段相关的sql语句。 - -以下是表结构和注释: -{note} -以下是表内数据 -{data_frame} -''', - 'sql_generate_base_on_data_prompt': '''你是一个postgres数据库专家,你的任务是参考给出的表结构以及表注释和表内数据,基于给出的问题生成一条查询{database_type}的sql语句。 -注意: -#01 sql语句中,特殊字段需要带上双引号。 -#02 sql语句中,如果要使用 as,请用双引号把别名包裹起来。 -#03 sql语句中,查询字段必须使用`distinct`关键字去重。 -#04 sql语句中,只返回生成的sql语句, 不要返回其他任何无关的内容 -#05 sql语句中,参考问题,对查询字段进行冗余。 -#06 sql语句中,需要以分号结尾。 - -以下是表结构以及表注释: -{note} -以下是表内的数据: -{data_frame} -以下是问题: -{question} -''', - 'sql_expand_prompt': '''你是一个数据库专家,你的任务是参考给出的表结构以及表注释、执行失败的sql和执行失败的报错,基于给出的问题修改执行失败的sql生成一条在{database_type}连接下可进行查询的sql语句。 - - 注意: - - #01 假设sql中有特殊字符干扰了sql的执行,请优先替换这些特殊字符保证sql可执行。 - - #02 假设sql用于检索或者过滤的字段导致了sql执行的失败,请尝试替换这些字段保证sql可执行。 - - #03 假设sql检索结果为空,请尝试将 = 的匹配方式替换为 ilike \'\%\%\' 保证sql执行给出结果。 - - #04 假设sql检索结果为空,可以使用问题中的关键字的子集作为sql的过滤条件保证sql执行给出结果。 - - 以下是表结构以及表注释: - - {note} - - 以下是执行失败的sql: - - {sql_failed} - - 以下是执行失败的报错: - - {sql_failed_message} - - 以下是问题: - - {question} -''', - 'table_choose_prompt': '''你是一个数据库专家,你的任务是参考给出的表名以及表的条目(主键,表名、表注释),输出最适配于问题回答检索的{table_cnt}张表,并返回表对应的主键。 -注意: -#01 输出的表名用python的list格式返回,下面是list的一个示例: -[\"prime_key1\",\"prime_key2\"]。 -#02 只输出包含主键的list即可不要输出其他内容!!! -#03 list重主键的顺序,按表与问题的适配程度从高到底排列。 -#04 若无任何一张表适用于问题的回答,请返回空列表。 - -以下是表的条目: -{table_entries} -以下是问题: -{question} -''' -} -print(text) -with open('./prompt.yaml', 'w', encoding='utf-8') as f: - yaml.dump(text, f, allow_unicode=True) diff --git a/chat2db/templetes/prompt.yaml b/chat2db/templetes/prompt.yaml deleted file mode 100644 index 0013b12..0000000 --- a/chat2db/templetes/prompt.yaml +++ /dev/null @@ -1,115 +0,0 @@ -question_generate_base_on_data_prompt: '你是一个postgres数据库专家,你的任务是根据给出的表结构和表内数据,输出一个用户可能针对这张表内的信息提出的问题。 - - 注意: - - #01 问题内容和形式需要多样化,例如要用到统计、排序、模糊匹配等相关问题。 - - #02 要以口语化的方式输出问题,不要机械的使用表内字段输出问题。 - - #03 不要输出问题之外多余的内容! - - #04 要基于用户的角度取提出问题,问题内容需要口语化、拟人化。 - - #05 优先生成有注释的字段相关的sql语句。 - - #06 不要对生成的sql进行解释。 - - 以下是表结构和注释: - - {note} - - 以下是表内数据 - - {data_frame} - - ' -sql_expand_prompt: "你是一个数据库专家,你的任务是参考给出的表结构以及表注释、执行失败的sql和执行失败的报错,基于给出的问题修改执行失败的sql生成一条在{database_type}连接下可进行查询的sql语句。\n\ - \n 注意:\n\n #01 假设sql中有特殊字符干扰了sql的执行,请优先替换这些特殊字符保证sql可执行。\n\n #02 假设sql用于检索或者过滤的字段导致了sql执行的失败,请尝试替换这些字段保证sql可执行。\n\ - \n #03 假设sql检索结果为空,请尝试将 = 的匹配方式替换为 ilike '\\%\\%' 保证sql执行给出结果。\n\n #04 假设sql检索结果为空,可以使用问题中的关键字的子集作为sql的过滤条件保证sql执行给出结果。\n\ - \n 以下是表结构以及表注释:\n\n {note}\n\n 以下是执行失败的sql:\n\n {sql_failed}\n\n 以下是执行失败的报错:\n\ - \n {sql_failed_message}\n\n 以下是问题:\n\n {question}\n" -sql_generate_base_on_data_prompt: '你是一个postgres数据库专家,你的任务是参考给出的表结构以及表注释和表内数据,基于给出的问题生成一条查询{database_type}的sql语句。 - - 注意: - - #01 sql语句中,特殊字段需要带上双引号。 - - #02 sql语句中,如果要使用 as,请用双引号把别名包裹起来。 - - #03 sql语句中,查询字段必须使用`distinct`关键字去重。 - - #04 sql语句中,只返回生成的sql语句, 不要返回其他任何无关的内容 - - #05 sql语句中,参考问题,对查询字段进行冗余。 - - #06 sql语句中,需要以分号结尾。 - - #07 不要对生成的sql进行解释。 - - 以下是表结构以及表注释: - - {note} - - 以下是表内的数据: - - {data_frame} - - 以下是问题: - - {question} - - ' -sql_generate_base_on_example_prompt: '你是一个数据库专家,你的任务是参考给出的表结构以及表注释和示例,基于给出的问题生成一条在{database_url}连接下可进行查询的sql语句。 - - 注意: - - #01 sql语句中,特殊字段需要带上双引号。 - - #02 sql语句中,如果要使用 as,请用双引号把别名包裹起来。 - - #03 sql语句中,查询字段必须使用`distinct`关键字去重。 - - #04 sql语句中,只返回生成的sql语句, 不要返回其他任何无关的内容 - - #05 sql语句中,参考问题,对查询字段进行冗余。 - - #06 sql语句中,需要以分号结尾。 - - - 以下是表结构以及表注释: - - {note} - - 以下是{k}个示例: - - {sql_example} - - 以下是问题: - - {question} - - ' -table_choose_prompt: '你是一个数据库专家,你的任务是参考给出的表名以及表的条目(主键,表名、表注释),输出最适配于问题回答检索的{table_cnt}张表,并返回表对应的主键。 - - 注意: - - #01 输出的表名用python的list格式返回,下面是list的一个示例: - - ["prime_key1","prime_key2"]。 - - #02 只输出包含主键的list即可不要输出其他内容!!! - - #03 list重主键的顺序,按表与问题的适配程度从高到底排列。 - - #04 若无任何一张表适用于问题的回答,请返回空列表。 - - - 以下是表的条目: - - {table_entries} - - 以下是问题: - - {question} - - ' diff --git a/data_chain/apps/base/convertor.py b/data_chain/apps/base/convertor.py index 69aeead..703a15a 100644 --- a/data_chain/apps/base/convertor.py +++ b/data_chain/apps/base/convertor.py @@ -358,10 +358,8 @@ class Convertor: """将任务实体和任务报告实体转换为任务""" try: task_completed = 0 - finished_time = None if task_report is not None: task_completed = task_report.current_stage/task_report.stage_cnt*100 - finished_time = task_report.created_time.strftime('%Y-%m-%d %H:%M') task = Task( opId=task_entity.op_id, opName=task_entity.op_name, @@ -369,7 +367,6 @@ class Convertor: taskStatus=TaskStatus(task_entity.status), taskType=TaskType(task_entity.type), taskCompleted=task_completed, - finishedTime=finished_time, createdTime=task_entity.created_time.strftime('%Y-%m-%d %H:%M') ) return task diff --git a/data_chain/entities/request_data.py b/data_chain/entities/request_data.py index e76f3c9..9685ee1 100644 --- a/data_chain/entities/request_data.py +++ b/data_chain/entities/request_data.py @@ -50,7 +50,7 @@ class ListTeamUserRequest(BaseModel): class CreateTeamRequest(BaseModel): team_name: str = Field(default='这是一个默认的团队名称', min_length=1, max_length=30, alias="teamName") description: str = Field(default='', max_length=150) - is_public: bool = Field(default=False, alias="isPublic") + is_public: bool = Field(default=False) class UpdateTeamRequest(BaseModel): diff --git a/data_chain/entities/response_data.py b/data_chain/entities/response_data.py index 37fdbbf..43c7257 100644 --- a/data_chain/entities/response_data.py +++ b/data_chain/entities/response_data.py @@ -195,7 +195,6 @@ class Task(BaseModel): task_status: TaskStatus = Field(description="任务状态", alias="taskStatus") task_type: TaskType = Field(description="任务类型", alias="taskType") task_completed: float = Field(description="任务完成度", alias="taskCompleted") - finished_time: Optional[str] = Field(default=None, description="任务完成时间", alias="finishedTime") created_time: str = Field(description="任务创建时间", alias="createdTime") -- Gitee