将多个scv文件按照分类汇总
import os import csv
# 指定包含CSV文件的文件夹路径、分类列名和缓存容量 folder_path = /path/to/csv_folder category_column_name = Category buffer_size = 1000 # 可根据需要调整
# 在输出结果中包含的列名 output_column_names = [Name, Category, Value]
# 初始化分类字典和计数器 categories = {} count = {}
# 遍历文件夹中的所有CSV文件,读取每个CSV文件并添加到相应的分类中 for filename in os.listdir(folder_path): if filename.endswith(".csv"): with open(os.path.join(folder_path, filename), r) as file: reader = csv.DictReader(file) for row in reader: category = row[category_column_name]
# 如果该分类不存在,则在分类字典中创建新的分类,并初始化计数器为0 if category not in categories: categories[category] = [] count[category] = 0 # 将当前行添加到相应的分类中,并增加计数器 categories[category].append(row) count[category] += 1 # 如果当前分类中的缓存计数器达到了缓存容量,则将缓存中的所有行写入到相应的CSV文件中 if count[category] >= buffer_size: output_filename = f{category}.csv output_path = os.path.join(folder_path, output_filename) with open(output_path, a, newline=) as output_file: writer = csv.DictWriter(output_file, fieldnames=output_column_names) # 如果输出文件为空,则先写入一行表头 if os.path.isfile(output_path) and os.path.getsize(output_path) == 0: writer.writeheader() # 写入当前分类中的所有行 for row in categories[category]: writer.writerow(row) # 清空当前分类中的缓存列表和计数器 categories[category] = [] count[category] = 0
# 遍历全部分类,将每个分类中剩余的行所有写入到相应的CSV文件中 for category, rows in categories.items(): if len(rows) > 0: output_filename = f{category}.csv output_path = os.path.join(folder_path, output_filename) with open(output_path, a, newline=) as output_file: writer = csv.DictWriter(output_file, fieldnames=output_column_names) # 如果输出文件为空,则先写入一行表头 if os.path.isfile(output_path) and os.path.getsize(output_path) == 0: writer.writeheader() # 写入当前分类中的所有行 for row in rows: writer.writerow(row)