diff --git a/contributors/bulabean/SEdemo.xlsx b/contributors/bulabean/SEdemo.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..7e6d6e3172224a7545859b8ce0c308ca9270a802 Binary files /dev/null and b/contributors/bulabean/SEdemo.xlsx differ diff --git a/contributors/bulabean/SearchExcel.py b/contributors/bulabean/SearchExcel.py index 9389d6fd8b6ad9c18e42410d6c35edb39f475d3a..092b177d9ec5561d7ae3f01ca6696f37ff2b202a 100644 --- a/contributors/bulabean/SearchExcel.py +++ b/contributors/bulabean/SearchExcel.py @@ -134,4 +134,4 @@ if __name__ == '__main__': for data in find_excel_data(search_key, target_dir): print(list(data)) time2 = time.time() - print("\n程序运行结束,停止运行。{}".format(round(time2-time1, 2))) \ No newline at end of file + print("\n程序运行结束,停止运行。耗时:{}秒".format(round(time2-time1, 2))) \ No newline at end of file diff --git a/contributors/bulabean/SplitExcel.py b/contributors/bulabean/SplitExcel.py new file mode 100644 index 0000000000000000000000000000000000000000..80726cd45ed8f13a7d407c67ca571a7a430c26b5 --- /dev/null +++ b/contributors/bulabean/SplitExcel.py @@ -0,0 +1,91 @@ +import os +import xlrd, xlwt +import openpyxl +import datetime + + +def generate_xls(filepath: str, worksheet_data: dict): + datetime_str = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S') + new_filepath = filepath.replace('.xls', '_Split_{}.xls'.format(datetime_str)) + new_workbook = xlwt.Workbook(encoding='utf-8') + for worksheet_name, row_data_list in worksheet_data.items(): + new_worksheet = new_workbook.add_sheet(worksheet_name) + for row_index, row_data in enumerate(row_data_list): + for column_index, data in enumerate(row_data): + new_worksheet.write(row_index, column_index, data) + new_workbook.save(new_filepath) + return new_filepath + + +def process_xls(filepath, column: int, worksheet_name: str = None): + try: + workbook = xlrd.open_workbook(filepath, formatting_info=True) + except: + return "文件读取异常:{}".format(filepath) + if worksheet_name: + worksheet = workbook.sheet_by_name(worksheet_name) + else: + worksheet = workbook.sheet_by_index(0) + rows = worksheet.nrows + cols = worksheet.ncols + split_data_dict = {} + for r in range(rows): + row_data = [worksheet.cell(r, c).value if worksheet.cell(r, c).value else ' ' for c in range(cols)] + temp_data = row_data[column-1] + temp_data_list = split_data_dict.get(temp_data, []) + temp_data_list.append(row_data) + split_data_dict[temp_data] = temp_data_list + new_filepath = generate_xls(filepath, split_data_dict) + return "数据保存在新文件中,文件名:{}".format(new_filepath) + + +def generate_xlsx(filepath: str, worksheet_data: dict): + datetime_str = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S') + new_filepath = filepath.replace('.xlsx', '_Split_{}.xlsx'.format(datetime_str)) + new_workbook = openpyxl.Workbook() + for worksheet_name, row_data_list in worksheet_data.items(): + new_worksheet = new_workbook.create_sheet(worksheet_name) + for row_data in row_data_list: + new_worksheet.append(row_data) + new_workbook.save(new_filepath) + return new_filepath + + +def process_xlsx(filepath:str, column: int, worksheet_name: str = None): + try: + workbook = openpyxl.load_workbook(filepath, read_only=True, data_only=True) + except: + return "文件读取异常:{}".format(filepath) + if worksheet_name: + worksheet = workbook.get_sheet_by_name(worksheet_name) + else: + worksheet = workbook.active + if worksheet.max_column < column: + return "最大列数是{},取不到第{}列".format(worksheet.max_column, column) + + split_data_dict = {} + for row in worksheet.rows: + row_data = [cell.value if cell.value else ' 'for cell in row] + temp_data = row_data[column-1] + temp_data_list = split_data_dict.get(temp_data, []) + temp_data_list.append(row_data) + split_data_dict[temp_data] = temp_data_list + new_filepath = generate_xlsx(filepath, split_data_dict) + return "数据保存在新文件中,文件名:{}".format(new_filepath) + + +def split_excel(filepath:str, column:int, worksheet_name: str=None): + if filepath.endswith('.xlsx'): + result = process_xlsx(filepath, column, worksheet_name) + elif filepath.endswith('.xls'): + result = process_xls(filepath, column, worksheet_name) + else: + return "文件格式不对,不进行处理" + return result + + +if __name__ == "__main__": + filename = 'sedemo.xls' + # filename = 'SEdemo.xlsx' + result = split_excel(filename, 6) # 处理文件,表格的第六列,worksheet_name指定工作表,不指定则读取文件默认工作表 + print(result) diff --git a/contributors/bulabean/sedemo.xls b/contributors/bulabean/sedemo.xls new file mode 100644 index 0000000000000000000000000000000000000000..8e814e90b86af6fce4de85fc4ee0e4dd39d9c97f Binary files /dev/null and b/contributors/bulabean/sedemo.xls differ diff --git a/contributors/bulabean/sedemo_Split_2022-08-23_203011.xls b/contributors/bulabean/sedemo_Split_2022-08-23_203011.xls new file mode 100644 index 0000000000000000000000000000000000000000..bb620692e017731d01b3a4e582a5f8096b6d7e45 Binary files /dev/null and b/contributors/bulabean/sedemo_Split_2022-08-23_203011.xls differ diff --git a/contributors/bulabean/sedemo_Split_2022-08-23_203413.xls b/contributors/bulabean/sedemo_Split_2022-08-23_203413.xls new file mode 100644 index 0000000000000000000000000000000000000000..bb620692e017731d01b3a4e582a5f8096b6d7e45 Binary files /dev/null and b/contributors/bulabean/sedemo_Split_2022-08-23_203413.xls differ