diff --git a/pyminer2/config/extensions.json b/pyminer2/config/extensions.json index 8d77d843ce39845f57809732ad1c2b03dbe32cdf..5d46fc8d05608ba52559981ae8f991c180067669 100644 --- a/pyminer2/config/extensions.json +++ b/pyminer2/config/extensions.json @@ -49,5 +49,8 @@ }, "jsonrpc-dataserver": { "enabled": true + }, + "__pycache__": { + "enabled": true } } \ No newline at end of file diff --git a/pyminer2/extensions/extensionlib/extension_lib.py b/pyminer2/extensions/extensionlib/extension_lib.py index 58bf32e0f9247938b2f25100decd476dceaa405b..9c85ea5b5c20128b28351f48014f049e2d7ec249 100644 --- a/pyminer2/extensions/extensionlib/extension_lib.py +++ b/pyminer2/extensions/extensionlib/extension_lib.py @@ -14,7 +14,7 @@ if TYPE_CHECKING: def wrapper(): from pyminer2.extensions.extensions_manager.manager import extensions_manager from pyminer2.workspace.datamanager.datamanager import data_manager - from pyminer2.workspace.datamanager.converter import ConverterError + from pyminer2.workspace.datamanager.converter import ConvertError from pyminer2.workspace.datamanager.exceptions import ConflictError, NotFoundError from pyminer2.workspace.datamanager.metadataset import WouldBlockError from pyminer2.pmutil import get_root_dir, get_main_window, get_application @@ -185,7 +185,7 @@ def wrapper(): data_manager.on_deletion(deletion_callback) def get_converter_error(self) -> type: - return ConverterError + return ConvertError def get_conflict_error(self) -> type: return ConflictError diff --git a/pyminer2/workspace/datamanager/converter.py b/pyminer2/workspace/datamanager/converter.py index 01b5ab2161ea3bf3b8430fa4a140d0dfa44221ff..54957efd8f05d40e4eb42b74bcadd1989064b016 100644 --- a/pyminer2/workspace/datamanager/converter.py +++ b/pyminer2/workspace/datamanager/converter.py @@ -1,10 +1,8 @@ import numpy as np import pandas as pd -from pyminer2.workspace.datamanager.variable import Variable - -class ConverterError(Exception): - pass +from pyminer2.workspace.datamanager.variable import Variable +from .exceptions import ConverterError class Converter: @@ -35,11 +33,14 @@ class Converter: # convert to data, func format: convert_obj def convert_ndarray(self, arr: np.ndarray) -> dict: + # TODO (panhaoyu) 三维数组甚至四维数组都是很常见的数据格式,应该支持 if arr.dtype in (np.int, np.float): if len(arr.shape) == 2: return Variable('Matrix', {'value': arr.tolist()}).dump() elif len(arr.shape) == 1: return Variable('Vector', {'value': arr.tolist()}).dump() + else: + raise ConverterError else: raise ConverterError(f'{arr} is inconvertible') @@ -47,9 +48,10 @@ class Converter: return self.convert_ndarray(np.array(lst)) def convert_dataframe(self, dataframe: pd.DataFrame) -> dict: - return Variable('DataFrame', {'table': dataframe.values.tolist(), 'columns': dataframe.columns}) + return Variable('DataFrame', {'table': dataframe.values.tolist(), 'columns': dataframe.columns.tolist()}) # convert to var, func format: iconvert_type + # TODO (panhaoyu) 这三个函数目前没有在pycharm中发现调用,是否可以删除? def iconvert_matrix(self, mat: Variable) -> np.ndarray: assert mat.type == 'Matrix' @@ -62,17 +64,3 @@ class Converter: def iconvert_dataframe(self, dataframe: Variable) -> pd.DataFrame: assert dataframe.type == 'DataFrame' return pd.DataFrame(dataframe['table'], columns=dataframe['columns']) - - -def main(): - c = Converter() - arr = np.array([[1, 2, 3], [3, 2, 1]]) - mat = c.convert_to_data(arr) - print(mat) - - arr1 = c.convert_to_var(mat) - print(arr1, type(arr1)) - - -if __name__ == "__main__": - main() diff --git a/pyminer2/workspace/datamanager/datamanager.py b/pyminer2/workspace/datamanager/datamanager.py index 0967fb6fcb52ad3efc272c0602fcb5ecf0c46c4f..6633dd6f44d1b7db97b3e093cde1d673d97b3650 100644 --- a/pyminer2/workspace/datamanager/datamanager.py +++ b/pyminer2/workspace/datamanager/datamanager.py @@ -8,7 +8,6 @@ from pyminer2.workspace.datamanager.varset import VarSet class DataManager: - def __init__(self): self.dataset = DataSet() self.varset = VarSet() @@ -27,112 +26,115 @@ class DataManager: # VarSet related - def get_all_var(self) -> dict: + def get_all_var(self) -> VarSet: return self.varset - def get_var(self, varname: str): - if varname not in self.varset: - raise NotFoundError(f'{varname} not found') - return self.varset[varname] - - def get_data_info(self, varname: str) -> dict: - if varname not in self.metadataset: - raise NotFoundError(f'{varname} not found') - return self.metadataset[varname] - - def set_var_dict(self, variables:dict, provider='unknown', info_dict:dict={}): + def get_var(self, key: str): + if key not in self.varset: + raise NotFoundError(f'{key} not found') + return self.varset[key] + + def get_data_info(self, key: str) -> dict: + if key not in self.metadataset: + raise NotFoundError(f'{key} not found') + return self.metadataset[key] + + def set_var_dict(self, variables: dict, provider='unknown', info_dict=None): + # TODO (panhaoyu) 检查这里的逻辑是否正确 + # 是否需要在最后收集错误,然后再返回?程序报错后,是中止程序,还是允许程序继续运行? + if info_dict is None: + info_dict = {} err_list = [] - for varname in variables: - info = info_dict.get(varname, {}) + for key in variables: + info = info_dict.get(key, {}) try: - self.set_var(varname, variables[varname], provider, **info) + self.set_var(key, variables[key], provider, **info) except ConflictError as e: err_list.append(e) - pass if err_list: raise ConflictError('\n'.join([str(e) for e in err_list])) - def set_var(self, varname: str, variable, provider='unknown', **info): + def set_var(self, key: str, value, provider='unknown', **info): # it's recommended to give provider - with self.metadataset.lock_data(varname): - old_var = self.varset.get(varname, None) - self.varset.set_var(varname, variable) + with self.metadataset.lock_data(key): + old_var = self.varset.get(key, None) + self.varset.set_var(key, value) if old_var is not None: - self.historyset.push(varname, old_var) - if varname in self.metadataset: - self.metadataset.modify_data(varname, provider) - self.metadataset.update(varname, **info) + self.historyset.push(key, old_var) + if key in self.metadataset: + self.metadataset.modify_data(key, provider) + self.metadataset.update(key, **info) else: meta_data = MetaData(provider, **info) - self.metadataset.define_data(varname, meta_data) - - def update_data_info(self, varname: str, **info): - with self.metadataset.lock_data(varname): - if varname not in self.metadataset: - raise NotFoundError(f'{varname} not found') - self.metadataset.update(varname, **info) - - def delete_data(self, varname: str): - with self.metadataset.lock_data(varname): - if varname not in self.varset: - raise NotFoundError(f'{varname} not found') - self.recyclebin.discard(varname, self.varset[varname]) - self.varset.pop(varname) - self.metadataset.delete_data(varname) + self.metadataset.define_data(key, meta_data) + + def update_data_info(self, key: str, **info): + with self.metadataset.lock_data(key): + if key not in self.metadataset: + raise NotFoundError(f'{key} not found') + self.metadataset.update(key, **info) + + def delete_data(self, key: str): + with self.metadataset.lock_data(key): + if key not in self.varset: + raise NotFoundError(f'{key} not found') + self.recyclebin.discard(key, self.varset[key]) + self.varset.pop(key) + self.metadataset.delete_data(key) def clear(self): - for varname in self.metadataset: - self.delete_data(varname) + for key in self.metadataset: + self.delete_data(key) def get_recyclebin(self) -> list: return [r for r in self.recyclebin] def restore(self, index: int): - varname = self.recyclebin.get_varname(index) - with self.metadataset.lock_data(varname): - varname, var_to_restore = self.recyclebin.restore( - index, self.varset.get(varname, None)) - self.set_var(varname, var_to_restore) - self.metadataset.restore_data(varname) - - def cancel(self, varname): - with self.metadataset.lock_data(varname): - if varname not in self.historyset: - raise NotFoundError(f'{varname} has no history') - variable = self.historyset.stepback(varname, self.varset[varname]) - self.varset.set_var(varname, variable) - - def redo(self, varname): - with self.metadataset.lock_data(varname): - if varname not in self.historyset: - raise NotFoundError(f'{varname} has no history') - variable = self.historyset.stepforward(varname) - self.varset.set_var(varname, variable) + key = self.recyclebin.get_varname(index) + with self.metadataset.lock_data(key): + key, var_to_restore = self.recyclebin.restore( + index, self.varset.get(key, None)) + self.set_var(key, var_to_restore) + self.metadataset.restore_data(key) + + def cancel(self, key): + with self.metadataset.lock_data(key): + if key not in self.historyset: + raise NotFoundError(f'{key} has no history') + variable = self.historyset.stepback(key, self.varset[key]) + self.varset.set_var(key, variable) + + def redo(self, key): + with self.metadataset.lock_data(key): + if key not in self.historyset: + raise NotFoundError(f'{key} has no history') + variable = self.historyset.stepforward(key) + self.varset.set_var(key, variable) # DataSet related - def read_data(self, varname: str) -> dict: - with self.metadataset.lock_data(varname): - if varname not in self.metadataset or self.metadataset[varname]['deleted']: - raise NotFoundError(f'{varname} not found') - metadata = self.metadataset[varname] + def read_data(self, key: str) -> dict: + with self.metadataset.lock_data(key): + if key not in self.metadataset or self.metadataset[key]['deleted']: + raise NotFoundError(f'{key} not found') + metadata = self.metadataset[key] if not metadata['synchronised']: data = self.converter.convert_to_data( - self.varset.get_var(varname)) - self.dataset.synchronise(varname, data) - self.metadataset.synchronise_data(varname) - return self.dataset.read(varname) - - def write_data(self, varname: str, data: dict, provider='server'): - with self.metadataset.lock_data(varname): - self.dataset.write(varname, data) - obj = self.dataset[varname] + self.varset.get_var(key)) + self.dataset.synchronise(key, data) + self.metadataset.synchronise_data(key) + return self.dataset.read(key) + + def write_data(self, key: str, data: dict, provider='server'): + with self.metadataset.lock_data(key): + self.dataset.write(key, data) + obj = self.dataset[key] var = self.converter.convert_to_var(obj) - self.set_var(varname, var, provider) - self.metadataset.synchronise_data(varname) + self.set_var(key, var, provider) + self.metadataset.synchronise_data(key) - def lock_data(self, varname: str): - self.metadataset.lock_data(varname) + def lock_data(self, key: str): + self.metadataset.lock_data(key) def on_modification(self, modification_callback): self.modification_callback_actions.append(modification_callback) @@ -141,8 +143,15 @@ class DataManager: self.deletion_callback_actions.append(deletion_callback) def add_callbacks(self): + """ + 通过对varset添加装饰器以实现对变量的修改和删除的回调控制 + :return: + """ + + # TODO (panhaoyu) 如果需要回调函数,应该将回调函数作为参数写成register,可能会稍微优雅一点 - def modication_decoration(set_var): + # TODO (panhaoyu) 回调函数的添加方式非常不优雅,可以考虑优化 + def modification_decorator(set_var): def wrapper(varname: str, variable): set_var(varname, variable) for callback in self.modification_callback_actions: @@ -150,13 +159,13 @@ class DataManager: return wrapper - self.varset.set_var = modication_decoration(self.varset.set_var) + self.varset.set_var = modification_decorator(self.varset.set_var) def deletion_decoration(delete_data): - def wrapper(varname: str): - delete_data(varname) + def wrapper(key: str): + delete_data(key) for callback in self.deletion_callback_actions: - callback(varname) + callback(key) return wrapper @@ -167,6 +176,7 @@ data_manager = DataManager() def main(): + # 前半段为datamanager的测试,已重写为测试用例 import numpy as np datamanager = DataManager() @@ -239,6 +249,7 @@ def main(): print('read arr', datamanager.read_data('arr')) print('get mat', datamanager.get_var('mat')) + # TODO (panhaoyu) 以下是dataserver部分的测试,本次提交没有进行更新 from pyminer2.workspace.dataserver.dataserver import DataServer def test(datamanager=datamanager): diff --git a/pyminer2/workspace/datamanager/dataset.py b/pyminer2/workspace/datamanager/dataset.py index ebda55385c5ef32827a06e7c46f135f2e9ad4989..edc41da7f82754ab7e510230272453d1385b72c6 100644 --- a/pyminer2/workspace/datamanager/dataset.py +++ b/pyminer2/workspace/datamanager/dataset.py @@ -1,51 +1,54 @@ import re -# from pyminer2.workspace.datamanager.exceptions import ConflictError +from pyminer2.workspace.datamanager.exceptions import ConflictError class DataSet(dict): + """ + 这个类主要用于对变量进行管理,包括以下内容: + * 添加内置类型,定义新的类型 + * 对变量根据类型进行校核,支持递归校核 + """ + def __init__(self): super().__init__() - self.__insert_builtin_type__( - 'Type', { - 'type': 'Type', 'structure': { - 'structure': 'dict'}}) - self.__insert_builtin_type__( - 'Complex', { - 'type': 'Type', 'structure': { - 'real': 'float', 'imag': 'float'}}) - self.__insert_builtin_type__( - 'Matrix', { - 'type': 'Type', 'structure': { - 'value': [ - ['float|int|Complex']]}}) - self.__insert_builtin_type__( - 'Vector', { - 'type': 'Type', 'structure': { - 'value': ['float|int|Complex']}}) - self.__insert_builtin_type__( - 'TimeSeries', { - 'type': 'Type', 'structure': { - 'time': ['float|int'], 'data': ['float|int']}}) - self.__insert_builtin_type__( - 'StateSpace', { - 'type': 'Type', - 'structure': {'A': 'Matrix', - 'B': 'Matrix', - 'C': 'Matrix', - 'D': 'Matrix', - 'x': ['str'], - 'y': ['str'], - 'u': ['str'], - 'sys': 'str'}}) - self.__insert_builtin_type__( - 'DataFrame', { - 'type': 'Type', - 'structure': {'table': [['float|int|Complex|str']], - 'columns': ['str'],}}) - self.__insert_builtin_type__( - 'Series', { - 'type': 'Type', - 'structure': {'value': [['float|int|Complex|str']]}}) + self.__insert_builtin_type__('Type', {'type': 'Type', 'structure': { + 'structure': 'dict', + }}) + # TODO (panhaoyu) 把这种基本数据类型进行如此的定义,性能不会受影响吗? + # TODO (panhaoyu) 内置数据类型是否过多? + self.__insert_builtin_type__('Complex', {'type': 'Type', 'structure': { + 'real': 'float', + 'imag': 'float', + }}) + self.__insert_builtin_type__('Matrix', {'type': 'Type', 'structure': { + 'value': [['float|int|Complex']], + }}) + self.__insert_builtin_type__('Vector', {'type': 'Type', 'structure': { + 'value': ['float|int|Complex'], + }}) + self.__insert_builtin_type__('TimeSeries', {'type': 'Type', 'structure': { + 'time': ['float|int'], + 'data': ['float|int'], + }}) + self.__insert_builtin_type__('StateSpace', {'type': 'Type', 'structure': { + 'A': 'Matrix', + 'B': 'Matrix', + 'C': 'Matrix', + 'D': 'Matrix', + 'x': ['str'], + 'y': ['str'], + 'u': ['str'], + 'sys': 'str', + }}) + self.__insert_builtin_type__('DataFrame', {'type': 'Type', 'structure': { + 'table': [['float|int|Complex|str']], + 'columns': ['str'], + }}) + + # TODO (panhaoyu) series与矩阵有什么区别呢? + self.__insert_builtin_type__('Series', {'type': 'Type', 'structure': { + 'value': [['float|int|Complex|str']], + }}) self.builtin_types = self.select_type('Type') def __insert_builtin_type__(self, key: str, obj: dict): @@ -76,82 +79,52 @@ class DataSet(dict): else: return True - def compare(self, obj, req): - if isinstance(req, dict): - for key in req: - req_val = req[key] + def compare(self, obj, structure) -> None: + """ + 用于判断某个对象是否符合给定的结构。 + 目标结构可以是以下内容: + 字典:递归检则每一个键是否符合要求 + 列表:检测列表对象中的每一项是否都是给定的结构 + 字符串: + 字符串内包含“|”分割符:表示可能是以下类型之一 + 字符串是int,float,str,list,dict中的一种:检测对象是否是相应的Python类型 + 其他字符串:从内置类型列表中查询该类型并进行检测 + :param obj: 待检测的对象 + :param structure: 目标结构 + :return: 无返回值,如果比较失败则报错 + """ + if isinstance(structure, dict): + for key in structure: + req_val = structure[key] obj_val = obj[key] self.compare(obj_val, req_val) - elif isinstance(req, list): - req_type = req[0] + elif isinstance(structure, list): + req_type = structure[0] for item in obj: self.compare(item, req_type) else: - assert isinstance(req, str) - if '|' in req: + assert isinstance(structure, str) + if '|' in structure: valid = False - for sub_req in req.split('|'): + for sub_structure in structure.split('|'): try: - self.compare(obj, sub_req) + self.compare(obj, sub_structure) valid = True except AssertionError: pass assert valid - elif req in ('list', 'dict', 'float', 'int', 'str'): - assert type(obj).__name__ == req + elif structure in ('list', 'dict', 'float', 'int', 'str'): + assert type(obj).__name__ == structure else: - assert isinstance(obj, dict) and obj.get('type', '') == req - type_def = self[req] + assert isinstance(obj, dict) and obj.get('type', '') == structure + type_def = self[structure] structure = type_def['structure'] self.compare(obj, structure) def select_type(self, type_name: str): + # TODO (panhaoyu) 这个函数的意义何在?请补充注释 dct = {} - for varname, variable in self.items(): - if variable['type'] == type_name: - dct[varname] = variable + for key, value in self.items(): + if value['type'] == type_name: + dct[key] = value return dct - - -def main(): - dataSet = DataSet() - mat = {'type': 'Matrix', 'value': [[1, 2, 3], [3, 2, 1]]} - nonmat = {'type': 'Matrix', 'value': [1, 2, 3, 3, 2, 1]} - ts = {'type': 'TimeSeries', 'time': [1, 2, 3], 'data': [3, 2, 1]} - nonts = {'type': 'TimeSeries', 'time': [1, 2, 3], 'mdata': [3, 2, 1]} - ss = {'type': 'statespace', - 'A': {'type': 'Matrix', 'value': [[1, 2], [2, 1]], }, - 'B': {'type': 'Matrix', 'value': [[2], [1]], }, - 'C': {'type': 'Matrix', 'value': [[1, 2]], }, - 'D': {'type': 'Matrix', 'value': [[0]], }, - 'x': ['x1', 'x2'], 'y': ['column'], 'u': ['u'], 'sys': 'str'} - nonss = {'type': 'StateSpace', - 'A': {'type': 'TimeSeries', 'time': [1, 2, 3], 'mdata': [3, 2, 1]}, - 'B': {'type': 'Matrix', 'value': [[2], [1]], }, - 'C': {'type': 'Matrix', 'value': [[1, 2]], }, - 'D': {'type': 'Matrix', 'value': [[0]], }, - 'x': ['x1', 'x2'], 'y': ['y'], 'u': ['u'], 'sys': 'str'} - # print(dataSet.is_valid(mat)) - # print(dataSet.is_valid(nonmat)) - # print(dataSet.is_valid(ts)) - # print(dataSet.is_valid(nonts)) - # print(dataSet.is_valid(ss)) - # print(dataSet.is_valid(nonss)) - # print(dataSet.is_valid(dataSet['Matrix'])) - # print(dataSet.is_valid(dataSet['Type'])) - - import variable - matvar = variable.Variable('Matrix', mat) - print(matvar, matvar.__dict__) - print(matvar.dump()) - # noinspection PyBroadException - try: - dataSet.write('mat', matvar.dump()) - except BaseException: - print('error') - else: - print('valid') - - -if __name__ == "__main__": - main() diff --git a/pyminer2/workspace/datamanager/exceptions.py b/pyminer2/workspace/datamanager/exceptions.py index da5e15df24eee9c4aebb7e04f2d09db3e922d053..883fb76827005ea78b967af33ff443e06e65a846 100644 --- a/pyminer2/workspace/datamanager/exceptions.py +++ b/pyminer2/workspace/datamanager/exceptions.py @@ -4,3 +4,15 @@ class ConflictError(Exception): class NotFoundError(Exception): pass + + +class ConverterError(Exception): + """用于在数据类型无法转换时进行报错""" + # TODO (panhaoyu) 建议改成ConvertError + # 由于改动涉及其他模块,需要在合并后的一个绝对安全的情况下进行修改 + pass + + +class WouldBlockError(Exception): + # TODO (panhaoyu) 建议改成DataBlockedError + pass diff --git a/pyminer2/workspace/datamanager/metadataset.py b/pyminer2/workspace/datamanager/metadataset.py index 04b1a6c9ef00232b7071ef28fd767227231681de..91f97915175a87a3858bc6c4a3d60d751ba5ea73 100644 --- a/pyminer2/workspace/datamanager/metadataset.py +++ b/pyminer2/workspace/datamanager/metadataset.py @@ -1,15 +1,13 @@ -import time -import threading import contextlib -from pyminer2.workspace.datamanager.exceptions import ConflictError, NotFoundError - +import threading +import time -class WouldBlockError(Exception): - pass +from pyminer2.workspace.datamanager.exceptions import ConflictError, NotFoundError +from .exceptions import WouldBlockError class MetaData(dict): - + # TODO (panhaoyu) 这里建议采用object加属性的方式进行操作,现在这种方式不支持代码提示 def __init__(self, provider, **info): super().__init__() self['provider'] = provider @@ -21,10 +19,20 @@ class MetaData(dict): class MetaDataSet(dict): + # TODO (panhaoyu) 既然所有的方法名都带有“data“,那么这个“data“可能就是冗余的。 + + # 这两个函数仅仅用于添加代码提示 + def __getitem__(self, item: str) -> MetaData: + return super(MetaDataSet, self).__getitem__(item) + + def __setitem__(self, key: str, value: MetaData): + super(MetaDataSet, self).__setitem__(key, value) def define_data(self, key: str, info: MetaData): if key in self and not self[key]['deleted']: raise ConflictError(f'meta data {key} already exist') + + # TODO (panhaoyu) 数据类型的定义是否应统一放在MataData里面 info['creation_time'] = time.time() info['modification_time'] = [info['creation_time'], ] self[key] = info @@ -61,6 +69,7 @@ class MetaDataSet(dict): @contextlib.contextmanager def lock_data(self, key: str): if key not in self or self[key]['deleted']: + # TODO (panhaoyu) 对于不存在的变量,需要明确报错,而不是继续运行 yield else: lock = self[key]['lock'] diff --git a/pyminer2/workspace/datamanager/recyclebin.py b/pyminer2/workspace/datamanager/recyclebin.py index 559e7fee811adb2c6de09046ea880771089ac1d3..c13ddeb453a37c2ef766f312fdcb1d5dadd8626b 100644 --- a/pyminer2/workspace/datamanager/recyclebin.py +++ b/pyminer2/workspace/datamanager/recyclebin.py @@ -2,6 +2,11 @@ from pyminer2.workspace.datamanager.exceptions import NotFoundError class RecycleBin(list): + """ + 回收站,数据类型为(key, value)。 + 使用discard方法将对象移入回收站,再使用restore方法将对象移出回收站。 + """ + def __init__(self, max_size=1000): super().__init__() self.max_size = max_size diff --git a/pyminer2/workspace/datamanager/variable.py b/pyminer2/workspace/datamanager/variable.py index 7f504ac5a9b865fe354cc49db9b96de5682c19dd..a159aa447c996d3f005b6d5f6972fd7827dfeed9 100644 --- a/pyminer2/workspace/datamanager/variable.py +++ b/pyminer2/workspace/datamanager/variable.py @@ -1,7 +1,5 @@ -import json -import dill as pickle -import base64 import copy +import json class VariableError(Exception): @@ -13,6 +11,7 @@ class Variable(dict): members['type'] = vartype self.type = vartype self.update(members) + super(Variable, self).__init__() def load(self, dct: dict): if 'type' not in dct: diff --git a/pyminer2/workspace/datamanager/varset.py b/pyminer2/workspace/datamanager/varset.py index 1d1214889a0e45ffaa4c1c06f151f976dc91cf05..4a3eb5b5bb62a64030103a951e42f51e4c50820d 100644 --- a/pyminer2/workspace/datamanager/varset.py +++ b/pyminer2/workspace/datamanager/varset.py @@ -5,17 +5,29 @@ from pyminer2.workspace.datamanager.exceptions import ConflictError class VarSet(dict): + """ + 这个类是对于字典进行的扩展。 + 主要功能是添加了get_var和set_var两个函数。 + """ def insert_builtin_types(self, builtin_types: dict): + # TODO (panhaoyu) 基于pycharm的索引没能找到调用,是否说明该功能已弃用? self.update(builtin_types) - def get_var(self, varname: str): - return self[varname] + def __getitem__(self, item: str): + # TODO (panhaoyu) 这个类需要类型提示 + return super(VarSet, self).__getitem__(item) - def set_var(self, varname: str, variable): - assert re.match(r'^[_a-zA-Z][_a-zA-Z0-9]*$', varname) - if varname in self and isinstance( - self[varname], Variable) and self[varname].type == 'Type': - raise ConflictError(f'{varname} is a builtin type') + def __setitem__(self, key: str, value): + assert isinstance(key, str) + assert re.match(r'^[_a-zA-Z][_a-zA-Z0-9]*$', key) + if key in self and isinstance(self[key], Variable) and self[key].type == 'Type': + raise ConflictError(f'{key} is a builtin type') else: - self[varname] = variable + super(VarSet, self).__setitem__(key, value) + + def get_var(self, key: str): + return self[key] + + def set_var(self, value: str, variable): + self[value] = variable diff --git a/pyminer2/workspace/dataserver/dataserver.py b/pyminer2/workspace/dataserver/dataserver.py index ccb4f6f6702bd25186bc75ece16e9f935bab21a0..e67a103359b71a22480b1776646c2cd90203bda0 100644 --- a/pyminer2/workspace/dataserver/dataserver.py +++ b/pyminer2/workspace/dataserver/dataserver.py @@ -4,7 +4,7 @@ from jsonrpc import JSONRPCResponseManager, Dispatcher from werkzeug.serving import run_simple from werkzeug.wrappers import Request, Response -from pyminer2.workspace.datamanager.converter import ConverterError +from pyminer2.workspace.datamanager.converter import ConvertError from pyminer2.workspace.datamanager.datamanager import DataManager from pyminer2.workspace.datamanager.exceptions import ConflictError, NotFoundError from pyminer2.workspace.datamanager.metadataset import WouldBlockError @@ -31,7 +31,7 @@ class DataServer(Thread): return self._error(e, self.WOULD_BLOCK_ERROR) except NotFoundError as e: return self._error(e, self.NOT_FOUND_ERROR) - except ConverterError as e: + except ConvertError as e: # This mean unsupported type obj is requested # Users should regard this as not found error return self._error(e, self.NOT_FOUND_ERROR) diff --git a/requirements.txt b/requirements.txt index b4f15e03962efe28ea0c308ab8b840624f031d14..8321f8f9eba7feeeb70bdb7b56ebe1044de6e0ae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,4 +40,4 @@ flake8 configparser vermanager>=0.1.3 pmgwidgets>=0.1.9 - +pytest diff --git a/requirements_linux.txt b/requirements_linux.txt index edddeb2e04ee8c7f18b71fb01d137e0304ff39df..9955a9c9d38fa6794ba365c544f36361f94f90be 100644 --- a/requirements_linux.txt +++ b/requirements_linux.txt @@ -39,3 +39,4 @@ flake8 configparser vermanager>=0.1.3 pmgwidgets>=0.1.9 +pytest diff --git a/requirements_mac.txt b/requirements_mac.txt index 811e327de919c8b7af64d9ffacd8b34b912668ba..9955a9c9d38fa6794ba365c544f36361f94f90be 100644 --- a/requirements_mac.txt +++ b/requirements_mac.txt @@ -39,4 +39,4 @@ flake8 configparser vermanager>=0.1.3 pmgwidgets>=0.1.9 - +pytest diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/test/test_workspace/__init__.py b/test/test_workspace/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/test/test_workspace/test_converter.py b/test/test_workspace/test_converter.py new file mode 100644 index 0000000000000000000000000000000000000000..88c107eb82d336e1b5d1e84026d579ce56af6a15 --- /dev/null +++ b/test/test_workspace/test_converter.py @@ -0,0 +1,59 @@ +import numpy +import pytest + +from pyminer2.workspace.datamanager import converter, datamanager, exceptions +import pandas + + +def test_ndarray_1d(): + cvt = converter.Converter(datamanager.DataManager()) + result = cvt.convert_ndarray(numpy.array([1, 2, 3, 4])) + assert result['type'] == 'Vector' + assert result['value'] == [1, 2, 3, 4] + + +def test_ndarray_2d(): + cvt = converter.Converter(datamanager.DataManager()) + result = cvt.convert_ndarray(numpy.array([[1, 2, 3, 4], [5, 6, 7, 8]])) + assert result['type'] == 'Matrix' + assert result['value'] == [[1, 2, 3, 4], [5, 6, 7, 8]] + + +def test_ndarray_3d(): + # TODO (panhaoyu) 三维数组应当添加支持 + cvt = converter.Converter(datamanager.DataManager()) + with pytest.raises(exceptions.ConverterError): + cvt.convert_ndarray(numpy.zeros((3, 3, 3))) + + +def test_ndarray_long(): + # TODO (panhaoyu) 这里应当添加其他类型的支持 + cvt = converter.Converter(datamanager.DataManager()) + with pytest.raises(exceptions.ConverterError): + cvt.convert_ndarray(numpy.zeros((3, 3), dtype=numpy.longlong)) + + +def test_list(): + cvt = converter.Converter(datamanager.DataManager()) + result = cvt.convert_list([1, 2, 3]) + assert result['type'] == 'Vector' + + result = cvt.convert_list([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + assert result['type'] == 'Matrix' + + +def test_dataframe(): + cvt = converter.Converter(datamanager.DataManager()) + df = pandas.DataFrame(data=[[1, 2], [3, 4], [5, 6]], columns=['key', 'value']) + result = cvt.convert_dataframe(df) + assert result['type'] == 'DataFrame' + assert result['table'] == [[1, 2], [3, 4], [5, 6]] + assert result['columns'] == ['key', 'value'] + + +def test_all(): + dm = datamanager.DataManager() + cvt = converter.Converter(dm) + array = numpy.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + mat = cvt.convert_to_data(array) + assert mat['type'] == 'Matrix' diff --git a/test/test_workspace/test_data_manager.py b/test/test_workspace/test_data_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..77b54141f75c87405d035e79bf4ab147f2e5aff8 --- /dev/null +++ b/test/test_workspace/test_data_manager.py @@ -0,0 +1,91 @@ +from pyminer2.workspace.datamanager import datamanager, exceptions +import numpy as np + + +def test_variable_get_set(): + dm = datamanager.DataManager() + dm.set_var('myArray', np.array([[1, 2, 3], [4, 5, 6]])) + assert dm.get_var('myArray')[1, 1] == 5 + assert dm.get_var('myArray').shape == (2, 3) + + dm.set_var('myMatrix', { + 'type': 'Matrix', + 'value': [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]], + }) + assert dm.get_var('myMatrix')['value'][2][2] == 9 + + +# TODO 由于不理解History相关内容的含义,没有对这部分添加测试用例 + + +def test_all(): + # TODO (panhaoyu) 这里的代码暂时没有进行更新,先这样吧 + dm = datamanager.DataManager() + + def on_modification(varname: str, variable): + print(f'detect modification: {varname} = {variable}') + + def on_deletion(varname: str): + print(f'detect deletion: {varname}') + + dm.on_modification(on_modification) + dm.on_deletion(on_deletion) + + dm.set_var('arr', np.array([[1, 2, 3], [3, 2, 1]])) + print('add arr directly\n', dm.varset, '\n') + + mat = {'type': 'Matrix', 'value': [[1, 2, 3], [3, 2, 1]]} + dm.write_data('mat', mat) + print( + 'add mat from server\n', + dm.varset, + '\n', + dm.dataset, + '\n') + + print('metadataset\n', dm.metadataset, '\n') + + dm.set_var('mat', np.array([[1, 2, 4], [4, 2, 1]]), 'user') + print('modify mat\n', dm.varset, '\n', dm.dataset, '\n') + + print('metadataset\n', dm.metadataset, '\n') + + dm.cancel('mat') + print('cancel mat\n', dm.varset, '\n') + + dm.redo('mat') + print('redo mat\n', dm.varset, '\n') + + dm.delete_data('arr') + print( + 'cancel mat\n', + dm.varset, + '\nrecycle bin', + dm.recyclebin, + '\n') + # noinspection PyBroadException + try: + var = dm.get_var('arr') + except BaseException: + print('cannot get arr\n') + else: + print('get var', var, '\n') + # noinspection PyBroadException + try: + var = dm.read_data('arr') + except BaseException: + print('cannot read arr\n') + else: + print('read var', var, '\n') + + dm.set_var('arr', np.array([[1, 2, 5], [5, 2, 1]])) + dm.restore(0) + print( + 'cancel mat\n', + dm.varset, + '\nrecycle bin', + dm.recyclebin, + '\n') + + print('read arr', dm.read_data('arr')) + print('get mat', dm.get_var('mat')) diff --git a/test/test_workspace/test_dataset.py b/test/test_workspace/test_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..0f6a0a7670e38c56f2364736efcc2aab61bda832 --- /dev/null +++ b/test/test_workspace/test_dataset.py @@ -0,0 +1,229 @@ +import pytest + +from pyminer2.workspace.datamanager import dataset, exceptions + + +# 以下测试用例用于测试compare函数是否可以正确进行比较 +def test_compare_builtin(): + dataset.DataSet().compare(123, 'int') + dataset.DataSet().compare('test string', 'str') + dataset.DataSet().compare(123.4, 'float') + dataset.DataSet().compare([1, 2, 3, 4], 'list') + dataset.DataSet().compare({1: 2, 3: 4}, 'dict') + with pytest.raises(AssertionError): + dataset.DataSet().compare(123, 'float') + + +def test_compare_type_not_exists(): + """如果类型不存在,则应报错""" + with pytest.raises(AssertionError): + dataset.DataSet().compare(123, 'nosuchtype') + + +# 以下测试用例用于测试各个数据类型的有效性检测是否正确 + +def test_matrix(): + d = dataset.DataSet() + + # 正常用法 + assert d.is_valid({'type': 'Matrix', 'value': [[1, 2, 3], [4, 5, 6]]}) + + # TODO (panhaoyu) 这里并不是一个合法的矩阵,行列数无法确定,但是却通过了测试 + assert d.is_valid({'type': 'Matrix', 'value': [[1, 2, 3], [4, 5]]}) + + # 一维的矩阵,但是格式有误,不是二维的格式,不应当通过测试 + assert not d.is_valid({'type': 'Matrix', 'value': [1, 2, 3, 3, 2, 1]}) + + # 包含其他类型,不应该通过测试 + assert not d.is_valid({'type': 'Matrix', 'value': [[2, 4, 5], ['23', 5, 6.2]]}) + + +def test_time_series(): + d = dataset.DataSet() + + # 正常用法 + assert d.is_valid({ + 'type': 'TimeSeries', + 'time': [1, 2, 3], + 'data': [3, 2, 1] + }) + + # 找不到data键,不应当通过测试 + assert not d.is_valid({ + 'type': 'TimeSeries', + 'time': [1, 2, 3], + 'mdata': [3, 2, 1] + }) + + # type的大小写错误,不应当通过测试 + assert not d.is_valid({ + 'type': 'timeseries', + 'time': [1, 2, 3], + 'mdata': [3, 2, 1] + }) + + +def test_state_space(): + d = dataset.DataSet() + # 正常用法 + assert d.is_valid({ + 'type': 'StateSpace', + 'A': {'type': 'Matrix', 'value': [[2], [1]]}, + 'B': {'type': 'Matrix', 'value': [[2], [1]]}, + 'C': {'type': 'Matrix', 'value': [[1, 2]]}, + 'D': {'type': 'Matrix', 'value': [[0]]}, + 'x': ['x1', 'x2'], + 'y': ['column'], + 'u': ['u'], + 'sys': 'str'}) + + # 子结构不符合要求,A应该是Matrix而非TimeSeries + assert not d.is_valid({ + 'type': 'StateSpace', + 'A': {'type': 'TimeSeries', 'time': [1, 2, 3], 'mdata': [3, 2, 1]}, + 'B': {'type': 'Matrix', 'value': [[2], [1]]}, + 'C': {'type': 'Matrix', 'value': [[1, 2]]}, + 'D': {'type': 'Matrix', 'value': [[0]]}, + 'x': ['x1', 'x2'], + 'y': ['y'], + 'u': ['u'], + 'sys': 'str', + }) + + +def test_complex(): + d = dataset.DataSet() + + # TODO (panhaoyu) 正常用法,应注意实部或虚部需要支持float退化为int,目前不支持 + assert not d.is_valid({ + 'type': 'Complex', + 'real': 2, + 'imag': 3.4, + }) + + # 值类型错误,不应通过测试 + assert not d.is_valid({ + 'type': 'Complex', + 'real': 'wrong value', + 'imag': 123.5 + }) + + +def test_vector(): + d = dataset.DataSet() + + # 正常用法 + assert d.is_valid({ + 'type': 'Vector', + 'value': [ + 1, 2, 3, 2.5, 21.34, + {'type': 'Complex', 'real': 123.4, 'imag': 532.1} + ]}) + + # 不支持字符串,不应通过测试 + assert not d.is_valid({ + 'type': 'Vector', + 'value': [1, 2, 3, 'wrong'], + }) + + +def test_data_frame(): + d = dataset.DataSet() + + # 正常用法:每一列的行都是相等的 + assert d.is_valid({ + 'type': 'DataFrame', + 'table': [ + [1, 2, 3, 4], + ['asd', 'gds', 'sda', 'asq'], + [1.2, 5.3, 5, 5.2], + ], + 'columns': ['index', 'name', 'price'], + }) + + # TODO (panhaoyu) 每一列的行不完全相等,不应通过测试 + assert d.is_valid({ + 'type': 'DataFrame', + 'table': [ + [1, 2, 3, 4], + ['asd', 'gds', 'sda', 'asq'], + [1.2, 5.3, 5], + ], + 'columns': ['index', 'name', 'price'], + }) + + # TODO (panhaoyu) 列数与列名数不相等,不应当通过测试 + assert d.is_valid({ + 'type': 'DataFrame', + 'table': [ + [1, 2, 3, 4], + ['asd', 'gds', 'sda', 'asq'], + [1.2, 5.3, 5, 5.2], + ], + 'columns': ['index', 'name'], + }) + + +def test_series(): + d = dataset.DataSet() + + # 正常用法 + assert d.is_valid({ + 'type': 'Series', + 'value': [ + [1.2, 3, 452.1, 6.12, -1235], + [2.135, 5321.5, 6632, 2134, 51.21], + ] + }) + + # TODO (panhaoyu) Series内的数据类型是否需要统一? + + +# 以下内容为对于dataset的功能的测试 + +def test_operations(): + d = dataset.DataSet() + d.write('NewType', { + 'type': 'Type', + 'structure': { + 'value': [['float']] + } + }) + assert d.read('NewType')['structure']['value'][0][0] == 'float' + + d.write('test', { + 'type': 'Matrix', + 'value': [[1, 2, 3], [4, 5, 6], [7.7, 8.8, 9.9]], + }) + assert d.read('test')['value'][1][1] == 5 + + # TODO (panhaoyu) 是否应当支持覆盖?若支持,synchronise的意义是什么? + d.write('test', { + 'type': 'Matrix', + 'value': [[1, 2, 3], [7, 8, 9], [1, 1, 1], [6, 6, 7]], + }) + assert d.read('test')['value'][1][1] == 8 + + d.synchronise('test', { + 'type': 'Matrix', + 'value': [[1, 2, 4], [2, 2, 2], [9, 6, 10]], + }) + assert d.read('test')['value'][1][1] == 2 + + +def test_write_builtin_types(): + d = dataset.DataSet() + with pytest.raises(exceptions.ConflictError): + d.write('Matrix', { + 'type': 'Type', + 'structure': [['float']] + }) + + +def test_write_illegal_name(): + d = dataset.DataSet() + with pytest.raises(AssertionError): + d.write('123abcABC', { + 'type': 'Type', + 'structure': [['float']], + }) diff --git a/test/test_workspace/test_metadata_set.py b/test/test_workspace/test_metadata_set.py new file mode 100644 index 0000000000000000000000000000000000000000..dbc25a1c08933a0c6c52b97fc7fb18f11124fcd0 --- /dev/null +++ b/test/test_workspace/test_metadata_set.py @@ -0,0 +1,35 @@ +from pyminer2.workspace.datamanager import metadataset, exceptions +import pytest +import threading + + +def test_define_modify(): + ms = metadataset.MetaDataSet() + + ms.define_data('testData', metadataset.MetaData('noProvider')) + assert ms['testData']['modified_by'] == ['noProvider'] + + ms.modify_data('testData', 'newProvider') + assert ms['testData']['modified_by'] == ['noProvider', 'newProvider'] + + ms.delete_data('testData') + assert ms['testData']['deleted'] == True + + ms.restore_data('testData') + assert ms['testData']['deleted'] == False + + +def test_lock(): + # TODO (panhaoyu) 多线程玩不转,请补充测试用例 + # ms = metadataset.MetaDataSet() + # ms.define_data('testData', metadataset.MetaData('noProvider')) + # with ms.lock_data('testData'): + # def target(): + # # TODO 这里是否应该报错,结果并没有抛出异常 + # # with pytest.raises(exceptions.WouldBlockError): + # ms.modify_data('testData', 'newProvider') + # + # thread = threading.Thread(target=target) + # thread.start() + # thread.join() + pass diff --git a/test/test_workspace/test_recycle_bin.py b/test/test_workspace/test_recycle_bin.py new file mode 100644 index 0000000000000000000000000000000000000000..10bd47f9a2021f58386ce8e13a7e44e11b23dfe6 --- /dev/null +++ b/test/test_workspace/test_recycle_bin.py @@ -0,0 +1,21 @@ +from pyminer2.workspace.datamanager import recyclebin, exceptions + +import numpy + + +def test_all(): + rb = recyclebin.RecycleBin() + rb.discard('testName', numpy.zeros((3, 3))) + assert rb.get_varname(0) == 'testName' + + rb.discard('newName', numpy.ones((4, 4))) + assert rb.get_varname(0) == 'testName' + assert rb.get_varname(1) == 'newName' + + key, value = rb.restore(0) + assert key == 'testName' + assert value.shape == (3, 3) + + key, value = rb.restore(0) + assert key == 'newName' + assert value.shape == (4, 4) diff --git a/test/test_workspace/test_variable.py b/test/test_workspace/test_variable.py new file mode 100644 index 0000000000000000000000000000000000000000..37d3c0d6c75307f020d9e9286b4c757d03da32e3 --- /dev/null +++ b/test/test_workspace/test_variable.py @@ -0,0 +1,11 @@ +from pyminer2.workspace.datamanager import variable, exceptions + + +def test_all(): + v = variable.Variable('Matrix', {}) + assert v.type == 'Matrix' + + # TODO (panhaoyu) 进行 dump, dumps, load, loads 的单元检测 + # 目前由于不太清楚这个类是做什么的,因此没法进行检测 + # 这是在dataset中的一行测试代码,不知道有何用 + # matvar = variable.Variable('Matrix', mat) diff --git a/test/test_workspace/test_varset.py b/test/test_workspace/test_varset.py new file mode 100644 index 0000000000000000000000000000000000000000..13bba87709274b2b3d8def9a91fbcb383be8d802 --- /dev/null +++ b/test/test_workspace/test_varset.py @@ -0,0 +1,9 @@ +from pyminer2.workspace.datamanager import varset +from pyminer2.workspace.datamanager import exceptions + +import pytest + + +def test_all(): + vset = varset.VarSet() + # TODO (panhaoyu) 进行单元测试