From dd2cb5ff9bff6fc3f7c46b343a42dbf1dffc9d99 Mon Sep 17 00:00:00 2001 From: archer0258 <627776371@qq.com> Date: Mon, 27 Jun 2022 01:05:52 +0000 Subject: [PATCH] add contributors/archer/fake2excel. --- contributors/archer/fake2excel | 92 ++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 contributors/archer/fake2excel diff --git a/contributors/archer/fake2excel b/contributors/archer/fake2excel new file mode 100644 index 0000000..7c4bd33 --- /dev/null +++ b/contributors/archer/fake2excel @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# -*- coding:utf-8 -*- + +############################################# +# File Name: excel.py +# Mail: 1957875073@qq.com +# Created Time: 2022-4-25 10:17:34 +# Description: 有关 excel 的自动化操作 +############################################# + +from faker import Faker +import pandas as pd +from alive_progress import alive_bar + +import numpy as np + + +def reduce_pandas_mem_usage(df): + # start_mem = df.memory_usage().sum() / 1024 ** 2 + # print('Memory usage of dataframe is {:.2f} MB'.format(start_mem)) + + for col in df.columns: # Iterate all the columns + col_type = df[col].dtype # Get the dtype of the column + + if col_type != object: # If the column is not object + c_min = df[col].min() # Get the minimum value + c_max = df[col].max() # Get the maximum value + if str(col_type)[:3] == 'int': # If the column is integer + if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max: + # If the column is within 8-bit integer range + df[col] = df[col].astype(np.int8) # Convert to int8 + elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max: + df[col] = df[col].astype(np.int16) + elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max: + df[col] = df[col].astype(np.int32) + elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max: + df[col] = df[col].astype(np.int64) + else: + if 'date' in col: + pass + else: + df[col] = df[col].astype('category') + + # end_mem = df.memory_usage().sum() / 1024 ** 2 + # print('Memory usage after optimization is: {:.2f} MB'.format(end_mem)) + # print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem)) + + return df + + +def fake2excel(columns=None, rows=1, language='zh_CN', path='./fake2excel.xlsx'): + """ + @Author & Date : CoderWanFeng 2022/5/13 0:12 + @Desc : columns:list,每列的数据名称,默认是名称 + rows:多少行,默认是1 + language:什么语言,可以填english,默认是中文 + path:输出excel的位置,有默认值 + """ + # 可以选择英语 + if columns is None: + columns = ['name'] + if language.lower() == 'english': + language = 'en_US' + # 开始造数 + fake = Faker(language) + excel_dict = {} + with alive_bar(len(columns) * rows) as bar: + for column in columns: # 循环每一列 + excel_dict[column] = [] # 初始化每一列 + while len(excel_dict[column]) < rows: # 循环每一列的每一行 + excel_dict[column].append(eval(f'fake.{column}()')) # 往每一列的每一行里面添加数据 + bar() # 动态显示进度 + # 用pandas,将模拟数据,写进excel里面 + writer = pd.ExcelWriter(path) # 创建一个ExcelWriter对象 + data = pd.DataFrame(excel_dict) # 将字典转换成DataFrame + data = reduce_pandas_mem_usage(data) # 压缩数据 + data.to_excel(writer, index=False) # 将数据写入Excel + writer.save() + + +def fake2excel_dateframe(columns, names, rows=1, language='zh_CN'): + # language = 'en_US' # 可以选择英语 + fake = Faker(language) + excel_dict = {} + # 改动部分 + for column, name in zip(columns, names): + excel_dict[name] = [] + while len(excel_dict[name]) < rows: # 循环每一列的每一行 + excel_dict[name].append(eval(f'fake.{column}')) # 往每一列的每一行里面添加数据 + data = pd.DataFrame(excel_dict) # 将字典转换成DataFrame + return data + -- Gitee