初始化

This commit is contained in:
2026-04-22 12:27:49 +08:00
commit 4857cb6e45
73 changed files with 20927 additions and 0 deletions

View File

@@ -0,0 +1,392 @@
# -*- coding: utf-8 -*-
import os
import re
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font, Border, Side, Alignment
from openpyxl.utils import get_column_letter
from tools.core.utils import arcgis_utils, common_utils
from tools.core.utils.os_utils import temp_files_processor
trzd5_order = ["砂质", "砂壤质", "壤质", "黏壤质", "黏质"]
trzd12_order = ["砂土及壤质砂土", "砂质壤土", "壤土", "粉砂质壤土", "砂质黏壤土", "黏壤土", "粉砂质黏壤土", "砂质黏土", "壤质黏土", "粉砂质黏土", "黏土", "重黏土"]
# --- 2. 辅助函数 ---
# 判断单元格类型
def get_merge_type(merged_range):
"""
判断合并类型
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None不是合并单元格
"""
if not merged_range:
return None
min_row, max_row = merged_range.min_row, merged_range.max_row
min_col, max_col = merged_range.min_col, merged_range.max_col
if max_row > min_row and max_col > min_col:
return 'both' # 同时跨行和跨列
elif max_row > min_row:
return 'row' # 行合并(垂直合并)
elif max_col > min_col:
return 'column' # 列合并(水平合并)
else:
return None # 实际上不是合并单元格
# 计算属性等级
def get_prop_level(prop_level):
"""根据输入值判断 返回等级"""
if pd.isna(prop_level) or str(prop_level) == "0":
return "-"
# 请根据您的实际分级标准调整这里的阈值
if str(prop_level) == "8" or prop_level == '砂土及壤质砂土':
return "砂质"
elif str(prop_level) == "11" or prop_level == '砂质壤土':
return "砂壤质"
elif str(prop_level) in ["6","3"] or prop_level in ['粉砂质壤土', '壤土']:
return "壤质"
elif str(prop_level) in ["1","4","9"] or prop_level in ['粉砂质年壤土', '黏壤土', '砂质黏壤土']:
return "黏壤质"
elif str(prop_level) in ["2","5","7","10","12"] or prop_level in ['粉砂质黏土', '黏土', '壤质黏土', '砂质黏土', '重黏土']:
return "黏质"
else:
return "-"
# 等级计算
def process_soil_dataframe(df:pd.DataFrame, level_config, target_prop):
"""
处理土壤数据DataFrame添加分级列
"""
result_df = df.copy()
if level_config and target_prop in df.columns:
grade_standards = level_config["标准等级"]
grade_column = "GRIDCODE"
# 使用向量化方法(性能更好)
result_df[grade_column] = common_utils.vectorized_grade_assignment(
df[target_prop].values, grade_standards
)
# 统计分级结果
result_df['YJDL'] = result_df['TDLYLX'].str[:2]
return result_df
# --- 3. 数据处理与分析 均值---
def process_data_for_table1(gdb_path, soil_prop_feature_name, df_origin_area, target_areas_dict,xzqmc,is_by_xzq, prop_config=None):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_feature_name
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
sample_fields = ['TDLYLX', field_name]
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
df_samples = clean_df(df_samples, [field_name])
processed_df = process_soil_dataframe(df_samples, prop_config, field_name) # 返回具有属性分级的列
processed_df['GRIDCODE'] = processed_df['GRIDCODE'].astype('int')
processed_df['属性分级'] = processed_df['GRIDCODE'].apply(get_prop_level)
# 计算全部样点均值、中位值、范围
processed_df[field_name] = processed_df[field_name].astype('float')
# ===处理样点数据,计算 各分级样点数
df_sample_means = processed_df.groupby(['属性分级','GRIDCODE']).size().reset_index(name='样点数')
df_sample_means['样点数占比'] = df_sample_means['样点数'] / df_sample_means['样点数'].sum() * 100
print("样点数计算完成。")
# ==处理制图数据,获各等级制图面积
# print(df_origin_area)
df_origin_area['YJDL'] = df_origin_area['YJDL_EJDL'].str.split('_').str[0]
df_map_data = df_origin_area.groupby(["XZQMC","YJDL", "GRIDCODE"]).agg({"temp_area": "sum"}).reset_index()
# print(df_map_data)
try:
if is_by_xzq:
df_map_data['adjusted_area'] = df_map_data['temp_area']
df_map_data['adjustment_factor'] = 1.0
# 获取所有存在的行政区和地类
existing_districts = df_map_data['XZQMC'].unique()
# 检查目标字典中的行政区是否存在
missing_districts = []
tt = [td for td in target_areas_dict.keys()]
for ed in existing_districts:
if ed not in tt:
missing_districts.append(ed)
# 如果有行政区不存在,返回原始数据并提示
if missing_districts:
print(f"警告:平差数据中不存在行政区: {missing_districts},未进行平差")
# 计算每个行政区每个地类的原始总面积
original_totals = df_map_data.groupby(['XZQMC', 'YJDL'])['temp_area'].sum()
# 对每个行政区的每个地类进行平差
for xzqmc, landuse_targets in target_areas_dict.items():
for yjdl, target_area in landuse_targets.items():
# 检查该行政区是否有此地类数据
if (xzqmc, yjdl) in original_totals.index and original_totals[(xzqmc, yjdl)] > 0:
adjustment_factor = target_area / original_totals[(xzqmc, yjdl)]
# 应用平差系数
mask = (df_map_data['XZQMC'] == xzqmc) & (df_map_data['YJDL'] == yjdl)
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
# print(f"{xzqmc} - 地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
else:
# 用df_target_area按YJDL进行平差计算
original_totals = df_map_data.groupby('YJDL')['temp_area'].sum().to_dict()
# 对每个地类进行平差
target_area_dict = target_areas_dict.get(xzqmc,"")
# print(target_areas_dict)
for yjdl, target_area in target_area_dict.items():
if (yjdl in original_totals and original_totals[yjdl] > 0) or target_area > 0:
adjustment_factor = target_area / original_totals[yjdl]
# 应用平差系数
mask = df_map_data['YJDL'] == yjdl
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
# print(f"地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
except Exception as e:
print(f"平差处理失败: {e}")
# print(df_map_data)
df_map_data['面积_亩'] = df_map_data['temp_area']
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level)
df_map_areas = df_map_data.groupby(['属性分级','GRIDCODE'])['面积_亩'].sum().reset_index(name='制图面积')
# 面积平差
df_map_areas['制图面积_平差后'] = df_map_areas['制图面积']
# ===计算面积占比
df_map_areas['面积占比'] = df_map_areas['制图面积_平差后'] / df_map_areas['制图面积_平差后'].sum() * 100
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['属性分级','GRIDCODE']],
df_map_areas[['属性分级','GRIDCODE']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['属性分级','GRIDCODE'], how='left')
df_final = pd.merge(df_final, df_map_areas, on=['属性分级','GRIDCODE'], how='left')
# print(df_final)
# (可选) 按“一级地类”和“二级地类”排序
df_final["属性分级"] = pd.Categorical(df_final['属性分级'], categories=trzd5_order, ordered=True)
# df_final["EJDL"] = pd.Categorical(df_final['EJDL'], categories=in_ejdl_order, ordered=True)
df_final.sort_values(['属性分级','GRIDCODE'], inplace=True)
print("数据处理流程完成!")
# print(df_final)
return df_final
# --- 3. Excel 制表 总表---
def write_to_excel_table1(df:pd.DataFrame, output_path, prop_config):
"""
【最终修正版】: 将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel将创建一个空的报告。")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws['A1'] = "没有有效的统计数据。"
wb.save(output_path)
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "行政区酸化程度等级分布及占比"
# --- a. 定义样式 (不变) ---
header_font = Font(name='宋体', size=11)
cell_font = Font(name='宋体', size=11)
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
def apply_style(cell_range, font, alignment=None, border=None):
for row in ws[cell_range]:
for cell in row:
cell.font = font
if alignment: cell.alignment = alignment
if border: cell.border = border
# --- b. 绘制表头 (不变) ---
ws.merge_cells('A1:B1'); ws['A1'] = '土壤三普分类'
ws.merge_cells('C1:D1'); ws['C1'] = '样点统计'
ws.merge_cells('E1:F1'); ws['E1'] = '制图统计'
ws['A2'] = '类别'; ws['B2'] = '名称'
ws['C2'] = '数量/个'; ws['D2'] = '占比%'
ws['E2'] = '面积/亩'; ws['F2'] = '占比%'
level_dict = prop_config['标准等级']
# 创建两个列表来分别存储上段和下段范围
upper_ranges = {value: key for key, value in level_dict.items()}
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('属性分级', sort=False, observed=False):
yl_start_row = current_row
# 1. 遍历该一级地类下的所有“二级地类”并写入数据
for _, row_data in group_yl_df.iterrows():
ws.cell(row=current_row, column=2).value = upper_ranges.get(str(row_data['GRIDCODE']), '-')
# --- 填充单元格的逻辑开始 ---
col_start = 3 # 从第 C 列开始填充
# 检查是否找到了该土属的数据
if not row_data.empty:
# 1. 构建要从 data_series 中查找的列名
sample_col = f'样点数'
sample_pct_col = f'样点数占比'
area_col = f'制图面积_平差后'
area_pct_col = f'面积占比'
# 2. 从 data_series 中安全地获取值
sample_val = row_data.get(sample_col, 0)
sample_pct_val = row_data.get(sample_pct_col, 0)
area_val = row_data.get(area_col, 0)
area_pct_val = row_data.get(area_pct_col, 0)
# 3. 将获取到的值填入单元格
ws.cell(row=current_row, column=col_start).value = f"{sample_val:.0f}" if sample_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 1).value = f"{sample_pct_val:.1f}" if sample_val > 0 else "-"
# 制图面积/亩
ws.cell(row=current_row, column=col_start + 2).value = f"{area_val:.0f}" if area_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 3).value = f"{area_pct_val:.1f}" if area_val > 0 else "-"
# 移动到下一个酸化等级的起始列
col_start += 2
else:
for _ in range(4):
ws.cell(row=current_row, column=col_start).value = "-"
col_start += 1
current_row += 1
# 合并“一级地类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row-1, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
# 2. 填充总计行
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=2)
ws.cell(row=current_row, column=1).value = '全区'
ws.cell(row=current_row, column=3).value = df['样点数'].sum()
ws.cell(row=current_row, column=4).value = '100'
ws.cell(row=current_row, column=5).value = f"{df['制图面积_平差后'].sum():.0f}"
ws.cell(row=current_row, column=6).value = '100'
# --- d. 应用样式和调整列宽 (最终健壮版) ---
if current_row > 1: # 确保有数据才应用样式
apply_style(f'A1:F{current_row}', cell_font, center_align, thin_border)
apply_style(f'A1:F2', header_font)
print("正在自动调整列宽...")
dims = {}
for row in ws.rows:
for cell in row:
if cell.value:
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
if get_merge_type(merged_range) == 'column':
continue
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
# 设置列宽
for col, value in dims.items():
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
def main(gdb_path, soil_prop_name, reclassed_features_path, dltb_features, output_path, target_area_dict,xzqmc, prop_config):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土壤分级分布.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
is_by_xzq = False if xzqmc not in ["北海市","来宾市","楚雄自治州"] else True
# out_table_mean = r"in_memory/out_table_mean"
temp_out_feature_class = r"in_memory/temp_out_feature_class"
temp_out_tables_area = r"in_memory/temp_out_tables_area"
# temp_files.append(out_table_mean)
temp_files.append(temp_out_tables_area)
# 求地类图斑和重分类栅格面的交集
arcpy.analysis.Intersect(
in_features=[dltb_features,reclassed_features_path],
out_feature_class=temp_out_feature_class,
join_attributes="ALL",
output_type="INPUT"
)
# 行政区划和相交结果进行交集制表
arcpy.analysis.TabulateIntersection(
in_zone_features="行政区划", # 乡镇边界
zone_fields="XZQMC",
in_class_features=temp_out_feature_class,
out_table=temp_out_tables_area,
class_fields="gridcode;YJDL_EJDL",
out_units="SQUARE_METERS"
)
clipped_table_df = arcgis_utils.read_arcgis_table(temp_out_tables_area)
# 生成表1 土壤属性分级分布 的统计Excel报告
final_dataframe = process_data_for_table1(gdb_path, soil_prop_name, clipped_table_df, target_area_dict,xzqmc,is_by_xzq, prop_config)
write_to_excel_table1(final_dataframe, output_excel_path, prop_config)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

View File

@@ -0,0 +1,360 @@
# -*- coding: utf-8 -*-
import os
import re
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font, Border, Side, Alignment
from openpyxl.utils import get_column_letter
from tools.core.utils import arcgis_utils, common_utils
from tools.core.utils.os_utils import temp_files_processor
# --- 2. 辅助函数 ---
# 判断单元格类型
def get_merge_type(merged_range):
"""
判断合并类型
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None不是合并单元格
"""
if not merged_range:
return None
min_row, max_row = merged_range.min_row, merged_range.max_row
min_col, max_col = merged_range.min_col, merged_range.max_col
if max_row > min_row and max_col > min_col:
return 'both' # 同时跨行和跨列
elif max_row > min_row:
return 'row' # 行合并(垂直合并)
elif max_col > min_col:
return 'column' # 列合并(水平合并)
else:
return None # 实际上不是合并单元格
# 计算属性等级
def get_prop_level(prop_level):
"""根据输入值判断 返回等级"""
if pd.isna(prop_level) or prop_level == 0:
return "-"
# 请根据您的实际分级标准调整这里的阈值
if int(prop_level) == 5 or prop_level == "砂质":
return "砂质"
elif int(prop_level) == 4 or prop_level == "砂壤质":
return "砂壤质"
elif int(prop_level) == 3 or prop_level == "壤质":
return "壤质"
elif int(prop_level) == 1 or prop_level == "黏壤质":
return "黏壤质"
elif int(prop_level) == 2 or prop_level == "黏质":
return "黏质"
else:
return "-"
# 等级计算
def process_soil_dataframe(df:pd.DataFrame, level_config, target_prop):
"""
处理土壤数据DataFrame添加分级列
"""
result_df = df.copy()
if level_config and target_prop in df.columns:
grade_standards = level_config["标准等级"]
grade_column = "GRIDCODE"
# 使用向量化方法(性能更好)
result_df[grade_column] = common_utils.vectorized_grade_assignment(
df[target_prop].values, grade_standards
)
# 统计分级结果
result_df['YJDL'] = result_df['TDLYLX'].str[:2]
return result_df
# --- 3. 数据处理与分析 均值---
def process_data_for_table1(gdb_path, soil_prop_feature_name, df_origin_area, target_areas_dict,xzqmc,is_by_xzq, prop_config=None):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_feature_name
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
sample_fields = ['TDLYLX', field_name]
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
df_samples = clean_df(df_samples, [field_name])
processed_df = df_samples.copy()
processed_df['属性分级'] = processed_df[field_name]
# ===处理样点数据,计算 各分级样点数
df_sample_means = processed_df.groupby(['属性分级']).size().reset_index(name='样点数')
df_sample_means['样点数占比'] = df_sample_means['样点数'] / df_sample_means['样点数'].sum() * 100
print("样点数计算完成。")
# ==处理制图数据,获各等级制图面积
# print(df_origin_area)
df_origin_area['YJDL'] = df_origin_area['YJDL_EJDL'].str.split('_').str[0]
df_map_data = df_origin_area.groupby(["XZQMC","YJDL", "GRIDCODE"]).agg({"temp_area": "sum"}).reset_index()
# print(df_map_data)
try:
if is_by_xzq:
df_map_data['adjusted_area'] = df_map_data['temp_area']
df_map_data['adjustment_factor'] = 1.0
# 获取所有存在的行政区和地类
existing_districts = df_map_data['XZQMC'].unique()
# 检查目标字典中的行政区是否存在
missing_districts = []
tt = [td for td in target_areas_dict.keys()]
for ed in existing_districts:
if ed not in tt:
missing_districts.append(ed)
# 如果有行政区不存在,返回原始数据并提示
if missing_districts:
print(f"警告:平差数据中不存在行政区: {missing_districts},未进行平差")
# 计算每个行政区每个地类的原始总面积
original_totals = df_map_data.groupby(['XZQMC', 'YJDL'])['temp_area'].sum()
# 对每个行政区的每个地类进行平差
for xzqmc, landuse_targets in target_areas_dict.items():
for yjdl, target_area in landuse_targets.items():
# 检查该行政区是否有此地类数据
if (xzqmc, yjdl) in original_totals.index and original_totals[(xzqmc, yjdl)] > 0:
adjustment_factor = target_area / original_totals[(xzqmc, yjdl)]
# 应用平差系数
mask = (df_map_data['XZQMC'] == xzqmc) & (df_map_data['YJDL'] == yjdl)
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
# print(f"{xzqmc} - 地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
else:
# 用df_target_area按YJDL进行平差计算
original_totals = df_map_data.groupby('YJDL')['temp_area'].sum().to_dict()
# 对每个地类进行平差
target_area_dict = target_areas_dict.get(xzqmc,"")
# print(target_areas_dict)
for yjdl, target_area in target_area_dict.items():
if (yjdl in original_totals and original_totals[yjdl] > 0) or target_area > 0:
adjustment_factor = target_area / original_totals[yjdl]
# 应用平差系数
mask = df_map_data['YJDL'] == yjdl
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
# print(f"地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
except Exception as e:
print(f"平差处理失败: {e}")
# print(df_map_data)
df_map_data['面积_亩'] = df_map_data['temp_area']
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level)
df_map_areas = df_map_data.groupby(['属性分级'])['面积_亩'].sum().reset_index(name='制图面积')
# 面积平差
df_map_areas['制图面积_平差后'] = df_map_areas['制图面积']
# ===计算面积占比
df_map_areas['面积占比'] = df_map_areas['制图面积_平差后'] / df_map_areas['制图面积_平差后'].sum() * 100
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['属性分级']],
df_map_areas[['属性分级']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['属性分级'], how='left')
df_final = pd.merge(df_final, df_map_areas, on=['属性分级'], how='left')
# print(df_final)
df_final.sort_values(['属性分级'], inplace=True)
print("数据处理流程完成!")
# print(df_final)
return df_final
# --- 3. Excel 制表 总表---
def write_to_excel_table1(df:pd.DataFrame, output_path, prop_config):
"""
【最终修正版】: 将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel将创建一个空的报告。")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws['A1'] = "没有有效的统计数据。"
wb.save(output_path)
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "土壤质地分类分布"
# --- a. 定义样式 (不变) ---
header_font = Font(name='宋体', size=11)
cell_font = Font(name='宋体', size=11)
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
def apply_style(cell_range, font, alignment=None, border=None):
for row in ws[cell_range]:
for cell in row:
cell.font = font
if alignment: cell.alignment = alignment
if border: cell.border = border
# --- b. 绘制表头 (不变) ---
ws.merge_cells('A1:A2'); ws['A1'] = '土壤质地类别'
ws.merge_cells('B1:C1'); ws['B1'] = '样点统计'
ws.merge_cells('D1:E1'); ws['D1'] = '制图统计'
ws['B2'] = '数量/个'; ws['C2'] = '占比%'
ws['D2'] = '面积/亩'; ws['E2'] = '占比%'
level_dict = prop_config['标准等级']
# 创建两个列表来分别存储上段和下段范围
upper_ranges = {value: key for key, value in level_dict.items()}
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
for index, row_data in df_to_write.iterrows():
# 检查是否找到了该土属的数据
if not row_data.empty:
# 1. 构建要从 data_series 中查找的列名
sample_col = f'样点数'
sample_pct_col = f'样点数占比'
area_col = f'制图面积_平差后'
area_pct_col = f'面积占比'
# 2. 从 data_series 中安全地获取值
row_name = row_data.get('属性分级', "")
sample_val = row_data.get(sample_col, 0)
sample_pct_val = row_data.get(sample_pct_col, 0)
area_val = row_data.get(area_col, 0)
area_pct_val = row_data.get(area_pct_col, 0)
ws.cell(row=current_row, column=1).value = f"{row_name}" if row_name else "-"
# 3. 将获取到的值填入单元格
ws.cell(row=current_row, column=2).value = f"{sample_val:.0f}" if sample_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=3).value = f"{sample_pct_val:.1f}" if sample_val > 0 else "-"
# 制图面积/亩
ws.cell(row=current_row, column=4).value = f"{area_val:.0f}" if area_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=5).value = f"{area_pct_val:.1f}" if area_val > 0 else "-"
current_row += 1
# 2. 填充总计行
ws.cell(row=current_row, column=1).value = '全区'
ws.cell(row=current_row, column=2).value = df['样点数'].sum()
ws.cell(row=current_row, column=3).value = '100'
ws.cell(row=current_row, column=4).value = f"{df['制图面积_平差后'].sum():.0f}"
ws.cell(row=current_row, column=5).value = '100'
# --- d. 应用样式和调整列宽 (最终健壮版) ---
if current_row > 1: # 确保有数据才应用样式
apply_style(f'A1:E{current_row}', cell_font, center_align, thin_border)
apply_style(f'A1:E2', header_font)
print("正在自动调整列宽...")
dims = {}
for row in ws.rows:
for cell in row:
if cell.value:
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
if get_merge_type(merged_range) == 'column':
continue
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
# 设置列宽
for col, value in dims.items():
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
def main(gdb_path, soil_prop_name, reclassed_features_path, dltb_features, output_path, target_area_dict,xzqmc, prop_config):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土壤分级分布.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
is_by_xzq = False if xzqmc not in ["北海市","来宾市","楚雄自治州"] else True
# out_table_mean = r"in_memory/out_table_mean"
temp_out_feature_class = r"in_memory/temp_out_feature_class"
temp_out_tables_area = r"in_memory/temp_out_tables_area"
# temp_files.append(out_table_mean)
temp_files.append(temp_out_tables_area)
# 求地类图斑和重分类栅格面的交集
arcpy.analysis.Intersect(
in_features=[dltb_features,reclassed_features_path],
out_feature_class=temp_out_feature_class,
join_attributes="ALL",
output_type="INPUT"
)
# 行政区划和相交结果进行交集制表
arcpy.analysis.TabulateIntersection(
in_zone_features="行政区划", # 乡镇边界
zone_fields="XZQMC",
in_class_features=temp_out_feature_class,
out_table=temp_out_tables_area,
class_fields="gridcode;YJDL_EJDL",
out_units="SQUARE_METERS"
)
clipped_table_df = arcgis_utils.read_arcgis_table(temp_out_tables_area)
# 生成表1 土壤属性分级分布 的统计Excel报告
final_dataframe = process_data_for_table1(gdb_path, soil_prop_name, clipped_table_df, target_area_dict,xzqmc,is_by_xzq, prop_config)
write_to_excel_table1(final_dataframe, output_excel_path, prop_config)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

View File

@@ -0,0 +1,513 @@
# -*- coding: utf-8 -*-
import os
import re
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font
from tools.core.utils import arcgis_utils, common_utils
from tools.core.utils.os_utils import temp_files_processor
from tools.core.utils.excel_utils import ExcelStyleUtils
# --- 2. 辅助函数 ---
xn_region = ['天峨县', '寻甸县', '罗平县', '丘北县', '永仁县', '南华县', '双柏县', '武定县', '祥云县', '楚雄彝族自治州']
hn_region = ['北海市', '海城区', '银海区', '铁山港区', '港南区', '容县', '平南县', '兴宁区', '武鸣区', '邕宁区', '苍梧县', '靖西市', '西畴县', '马关县', '澜沧县', '双江县', '永德县']
# 计算属性等级
def get_prop_level(prop_level):
"""根据输入值判断 返回等级"""
if pd.isna(prop_level) or prop_level == 0:
return "-"
# 请根据您的实际分级标准调整这里的阈值
if int(prop_level) == 1 or int(prop_level) == 6 or prop_level == '等级一':
return "Ⅰ级"
elif int(prop_level) == 2 or int(prop_level) == 7 or prop_level == '等级二':
return "Ⅱ级"
elif int(prop_level) == 3 or int(prop_level) == 8 or prop_level == '等级三':
return "Ⅲ级"
elif int(prop_level) == 4 or int(prop_level) == 9 or prop_level == '等级四':
return "Ⅳ级"
elif int(prop_level) == 5 or int(prop_level) == 10 or prop_level == '等级五':
return "Ⅴ级"
else:
return "-"
def get_prop_level_for_pH(prop_level):
if pd.isna(prop_level) or prop_level == 0:
return "-"
if int(prop_level) == 5 or prop_level == "等级五":
return "Ⅰ级"
elif int(prop_level) in [4, 6] or prop_level in ["等级四", "等级六"]:
return "Ⅱ级"
elif int(prop_level) in [3, 7] or prop_level in ["等级三", "等级七"]:
return "Ⅲ级"
elif int(prop_level) in [2, 8] or prop_level in ["等级二", "等级八"]:
return "Ⅳ级"
elif int(prop_level) in [1, 9] or prop_level in ["等级一", "等级九"]:
return "Ⅴ级"
else:
return "-"
def get_prop_level_for_hn_TRRZ(prop_level):
if pd.isna(prop_level) or prop_level == 0:
return "-"
if int(prop_level) == 3 or prop_level == "等级三":
return "Ⅰ级"
elif int(prop_level) == 4 or prop_level == "等级四":
return "Ⅱ级"
elif int(prop_level) in [2, 5] or prop_level in ["等级二", "等级五"]:
return "Ⅲ级"
elif int(prop_level) == 6 or prop_level == "等级六":
return "Ⅳ级"
elif int(prop_level) in [1, 7] or prop_level in ["等级一", "等级七"]:
return "Ⅴ级"
else:
return "-"
def get_prop_level_for_xn_TRRZ(prop_level):
if pd.isna(prop_level) or prop_level == 0:
return "-"
if int(prop_level) == 4 or prop_level == "等级四":
return "Ⅰ级"
elif int(prop_level) in [3,5] or prop_level in ["等级三", "等级五"]:
return "Ⅱ级"
elif int(prop_level) == 6 or prop_level == "等级六":
return "Ⅲ级"
elif int(prop_level) in [2, 7] or prop_level in ["等级二", "等级七"]:
return "Ⅳ级"
elif int(prop_level) in [1, 8] or prop_level in ["等级一", "等级八"]:
return "Ⅴ级"
else:
return "-"
# 等级计算
def process_soil_dataframe(df:pd.DataFrame, level_config, target_prop):
"""
处理土壤数据DataFrame添加分级列
"""
result_df = df.copy()
if level_config and target_prop in df.columns:
grade_standards = level_config["标准等级"]
grade_column = "GRIDCODE"
# 使用向量化方法(性能更好)
result_df[grade_column] = common_utils.vectorized_grade_assignment(
df[target_prop].values, grade_standards
)
# 统计分级结果
result_df['YJDL'] = result_df['TDLYLX'].str[:2]
return result_df
# --- 3. 数据处理与分析 均值---
def process_data_for_table1(gdb_path, soil_prop_feature_name, df_origin_area, target_areas_dict,xzqmc,is_by_xzq, prop_config=None):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_feature_name
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
sample_fields = ['TDLYLX', field_name]
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
df_samples = clean_df(df_samples, [field_name])
processed_df = process_soil_dataframe(df_samples, prop_config, field_name) # 返回具有属性分级的列
processed_df['GRIDCODE'] = processed_df['GRIDCODE'].astype('int')
if soil_prop_feature_name == 'PH':
processed_df['属性分级'] = processed_df['GRIDCODE'].apply(get_prop_level_for_pH)
elif soil_prop_feature_name == 'TRRZ' and xzqmc in hn_region:
processed_df['属性分级'] = processed_df['GRIDCODE'].apply(get_prop_level_for_hn_TRRZ)
elif soil_prop_feature_name == 'TRRZ' and xzqmc in xn_region:
processed_df['属性分级'] = processed_df['GRIDCODE'].apply(get_prop_level_for_xn_TRRZ)
else:
processed_df['属性分级'] = processed_df['GRIDCODE'].apply(get_prop_level)
# 计算全部样点均值、中位值、范围
processed_df[field_name] = processed_df[field_name].astype('float')
stat_sample = {
'min': processed_df[field_name].min(),
'max': processed_df[field_name].max(),
'mean':processed_df[field_name].mean(),
'median': processed_df[field_name].median(),
}
# ===处理样点数据,计算 各分级样点数
df_sample_means = processed_df.groupby(['属性分级','GRIDCODE']).size().reset_index(name='样点数')
df_sample_means['样点数占比'] = df_sample_means['样点数'] / df_sample_means['样点数'].sum() * 100
print("样点数计算完成。")
# ==处理制图数据,获各等级制图面积
# print(df_origin_area)
df_origin_area['YJDL'] = df_origin_area['YJDL_EJDL'].str.split('_').str[0]
# 定义需要过滤地类的属性列表
filtered_props = ['ECA', 'EMG', 'ACU', 'AZN', 'AFE', 'AMN', 'AMO', 'AB', 'AS1', 'TSE']
# 如果当前属性在列表中,则只统计耕地和园地
if soil_prop_feature_name in filtered_props:
farmland_yjdl = ['耕地', '园地'] # 01: 耕地, 02: 园地
df_origin_area = df_origin_area[df_origin_area['YJDL'].isin(farmland_yjdl)]
print(f"过滤制图数据仅统计耕地和园地YJDL in {farmland_yjdl}")
# 如果土壤属性为GZCHD则只需要耕地的面积统计
if soil_prop_feature_name in ['GZCHD']:
df_origin_area = df_origin_area[df_origin_area['YJDL'] == '耕地']
print(f"过滤制图数据GZCHD仅统计耕地")
df_map_data = df_origin_area.groupby(["XZQMC","YJDL", "GRIDCODE"]).agg({"temp_area": "sum"}).reset_index()
# print(df_map_data)
try:
if is_by_xzq:
df_map_data['adjusted_area'] = df_map_data['temp_area']
df_map_data['adjustment_factor'] = 1.0
# 获取所有存在的行政区和地类
existing_districts = df_map_data['XZQMC'].unique()
# 检查目标字典中的行政区是否存在
missing_districts = []
tt = [td for td in target_areas_dict.keys()]
for ed in existing_districts:
if ed not in tt:
missing_districts.append(ed)
# 如果有行政区不存在,返回原始数据并提示
if missing_districts:
print(f"警告:平差数据中不存在行政区: {missing_districts},未进行平差")
# 计算每个行政区每个地类的原始总面积
original_totals = df_map_data.groupby(['XZQMC', 'YJDL'])['temp_area'].sum()
# 对每个行政区的每个地类进行平差
for xzqmc, landuse_targets in target_areas_dict.items():
for yjdl, target_area in landuse_targets.items():
# 检查该行政区是否有此地类数据
if (xzqmc, yjdl) in original_totals.index and original_totals[(xzqmc, yjdl)] > 0:
adjustment_factor = target_area / original_totals[(xzqmc, yjdl)]
# 应用平差系数
mask = (df_map_data['XZQMC'] == xzqmc) & (df_map_data['YJDL'] == yjdl)
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
# print(f"{xzqmc} - 地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
else:
# 用df_target_area按YJDL进行平差计算
original_totals = df_map_data.groupby('YJDL')['temp_area'].sum().to_dict()
# 对每个地类进行平差
target_area_dict = target_areas_dict.get(xzqmc,"")
# print(target_areas_dict)
for yjdl, target_area in target_area_dict.items():
if (yjdl in original_totals and original_totals[yjdl] > 0) or target_area > 0:
adjustment_factor = target_area / original_totals[yjdl]
# 应用平差系数
mask = df_map_data['YJDL'] == yjdl
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
# print(f"地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
except Exception as e:
print(f"平差处理失败: {e}")
# print(df_map_data)
df_map_data['面积_亩'] = df_map_data['temp_area']
if soil_prop_feature_name == 'PH':
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level_for_pH)
elif soil_prop_feature_name == 'TRRZ' and xzqmc in hn_region:
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level_for_hn_TRRZ)
elif soil_prop_feature_name == 'TRRZ' and xzqmc in xn_region:
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level_for_xn_TRRZ)
else:
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level)
df_map_areas = df_map_data.groupby(['属性分级','GRIDCODE'])['面积_亩'].sum().reset_index(name='制图面积')
# 面积平差
df_map_areas['制图面积_平差后'] = df_map_areas['制图面积']
# ===计算面积占比
df_map_areas['面积占比'] = df_map_areas['制图面积_平差后'] / df_map_areas['制图面积_平差后'].sum() * 100
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['属性分级','GRIDCODE']],
df_map_areas[['属性分级','GRIDCODE']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['属性分级','GRIDCODE'], how='left')
df_final = pd.merge(df_final, df_map_areas, on=['属性分级','GRIDCODE'], how='left')
# print(df_final)
df_final.sort_values(['属性分级'], inplace=True)
print("数据处理流程完成!")
# print(df_final)
return df_final, stat_sample
# --- 3. Excel 制表 总表---
def write_to_excel_table1(df:pd.DataFrame, output_path, prop_config, soil_prop_tif, stat_sample):
"""
【最终修正版】: 将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel将创建一个空的报告。")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws['A1'] = "没有有效的统计数据。"
wb.save(output_path)
return
# 全区制图统计
"""
try:
raster = arcpy.Raster(soil_prop_tif)
# 转换为numpy数组进行计算
array = arcpy.RasterToNumPyArray(raster,nodata_to_value=9999)
# 过滤掉NoData值
# 过滤NoData值和9999值
array = array[~np.isnan(array)] # 过滤NoData
array = array[array != 9999] # 过滤9999
array = array.astype(np.float64)
stats = {
'min': round(np.min(array),2),
'max': round(np.max(array),2),
'mean': round(np.mean(array),2),
'median': round(np.median(array),2),
'std': round(np.std(array),2)
}
except Exception as e:
print(f"错误: {e}")
"""
# 全区样点统计
stats = stat_sample
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "行政区酸化程度等级分布及占比"
# 获取属性单位
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅']
fsn_props = ['砂粒含量','粉粒含量','黏粒含量','有效土层厚度']
prop_name_str = prop_config.get('项目分级','')
if prop_name_str:
split_name = prop_name_str.split('\n')[0].strip()
if split_name in special_prop:
prop_name = '1f'
elif split_name in fsn_props:
prop_name = '0f'
else:
prop_name = '2f'
else:
prop_name = '1f'
# print(prop_name_str, prop_name)
prop_unit_str = prop_config.get('分级标准', '')
if prop_unit_str:
prop_unit = prop_unit_str.split('\n')[1].strip()
else:
prop_unit = ''
# --- b. 绘制表头 (不变) ---
ws.merge_cells('A1:B1'); ws['A1'] = '土壤三普分级'
ws.merge_cells('C1:D1'); ws['C1'] = '样点统计'
ws.merge_cells('E1:F1'); ws['E1'] = '制图统计'
ws['A2'] = '分级'; ws['B2'] = '值域/' + prop_unit if prop_unit else '值域'
ws['C2'] = '数量/个'; ws['D2'] = '占比%'
ws['E2'] = '面积/亩'; ws['F2'] = '占比%'
acid_levels = ['Ⅰ级','Ⅱ级', 'Ⅲ级', 'Ⅳ级', 'Ⅴ级']
level_dict = prop_config['标准等级']
# 创建两个列表来分别存储上段和下段范围
upper_ranges = {}
lower_ranges = {}
# 遍历排序后的等级
for i, (level, ranges) in enumerate(sorted(level_dict.items(), key=lambda x: list(level_dict.keys()).index(x[0])), 1):
# 分割范围字符串
range_list = [r.strip() for r in ranges.split(',')]
if len(range_list) >= 1:
upper_ranges[i] = range_list[0]
if len(range_list) >= 2:
# 计算下段范围的索引(原始索引 + 等级总数)
lower_index = i + len(level_dict)
lower_ranges[lower_index] = range_list[1]
# 合并结果
upper_ranges.update(lower_ranges)
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('属性分级', sort=False, observed=False):
yl_start_row = current_row
# 1. 遍历该一级地类下的所有“二级地类”并写入数据
for _, row_data in group_yl_df.iterrows():
ws.cell(row=current_row, column=2).value = upper_ranges.get(row_data['GRIDCODE'], '-')
# --- 填充单元格的逻辑开始 ---
col_start = 3 # 从第 C 列开始填充
# 检查是否找到了该土属的数据
if not row_data.empty:
# 1. 构建要从 data_series 中查找的列名
sample_col = f'样点数'
sample_pct_col = f'样点数占比'
area_col = f'制图面积_平差后'
area_pct_col = f'面积占比'
# 2. 从 data_series 中安全地获取值
sample_val = row_data.get(sample_col, 0)
sample_pct_val = row_data.get(sample_pct_col, 0)
area_val = row_data.get(area_col, 0)
area_pct_val = row_data.get(area_pct_col, 0)
# 3. 将获取到的值填入单元格
ws.cell(row=current_row, column=col_start).value = f"{sample_val:.0f}" if sample_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 1).value = f"{sample_pct_val:.1f}" if sample_val > 0 else "-"
# 制图面积/亩
ws.cell(row=current_row, column=col_start + 2).value = f"{area_val:.0f}" if area_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 3).value = f"{area_pct_val:.1f}" if area_val > 0 else "-"
# 移动到下一个酸化等级的起始列
col_start += 2
else:
for _ in range(4):
ws.cell(row=current_row, column=col_start).value = "-"
col_start += 1
current_row += 1
# 合并“一级地类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row-1, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
# 2. 填充总计行
ws.cell(row=current_row, column=1).value = '全区'
ws.cell(row=current_row, column=2).value = '-'
ws.cell(row=current_row, column=3).value = df['样点数'].sum()
ws.cell(row=current_row, column=4).value = '100'
ws.cell(row=current_row, column=5).value = f"{df['制图面积_平差后'].sum():.0f}"
ws.cell(row=current_row, column=6).value = '100'
# 3. 合计单元格填充
ws.merge_cells(f'B{current_row + 1}:F{current_row + 1}')
ws.cell(row=current_row + 1, column=1).value = '全区均值'
ws.cell(row=current_row + 1, column=2).value = f'{stats["mean"]:.{prop_name}}'
ws.merge_cells(f'B{current_row + 2}:F{current_row + 2}')
ws.cell(row=current_row + 2, column=1).value = '全区中位值'
ws.cell(row=current_row + 2, column=2).value = f'{stats["median"]:.{prop_name}}'
ws.merge_cells(f'B{current_row + 3}:F{current_row + 3}')
ws.cell(row=current_row + 3, column=1).value = '全区范围'
ws.cell(row=current_row + 3, column=2).value = f'{stats["min"]:.{prop_name}} {stats["max"]:.{prop_name}}'
# --- a. 定义样式 ---
header_font = Font(name='宋体', size=11, bold=True)
# --- d. 应用样式和调整列宽 (最终健壮版) ---
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:F{current_row+3}')
ExcelStyleUtils.set_style(ws, f'A1:F2', header_font)
# 调整列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
def main(gdb_path, soil_prop_name, reclassed_features_path, dltb_features, soil_prop_tif, output_path, target_area_dict,xzqmc, prop_config):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土壤分级分布.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
is_by_xzq = False if xzqmc not in ["北海市","来宾市","楚雄自治州"] else True
# out_table_mean = r"in_memory/out_table_mean"
temp_out_feature_class = r"in_memory/temp_out_feature_class"
temp_out_tables_area = r"in_memory/temp_out_tables_area"
# temp_files.append(out_table_mean)
temp_files.append(temp_out_tables_area)
# if not arcpy.Exists(out_table_mean):
# # 2.用arcpy.sa.ZonalStatisticsAsTable 以表格进行分区统计
# arcpy.sa.ZonalStatisticsAsTable(
# dltb_features, "YJDL_EJDL", soil_prop_tif, out_table_mean, "DATA", "MEAN"
# )
# arcpy.management.CalculateField(out_table_mean, "YJDL", "!YJDL_EJDL!.split('_')[0]", "PYTHON3")
# arcpy.management.CalculateField(out_table_mean, "EJDL", "!YJDL_EJDL!.split('_')[1]", "PYTHON3")
# 求地类图斑和重分类栅格面的交集
arcpy.analysis.Intersect(
in_features=[dltb_features,reclassed_features_path],
out_feature_class=temp_out_feature_class,
join_attributes="ALL",
output_type="INPUT"
)
# 行政区划和相交结果进行交集制表
arcpy.analysis.TabulateIntersection(
in_zone_features="行政区划", # 乡镇边界
zone_fields="XZQMC",
in_class_features=temp_out_feature_class,
out_table=temp_out_tables_area,
class_fields="gridcode;YJDL_EJDL",
out_units="SQUARE_METERS"
)
clipped_table_df = arcgis_utils.read_arcgis_table(temp_out_tables_area)
# 生成表1 土壤属性分级分布 的统计Excel报告
final_dataframe,stat = process_data_for_table1(gdb_path, soil_prop_name, clipped_table_df, target_area_dict,xzqmc,is_by_xzq, prop_config)
write_to_excel_table1(final_dataframe, output_excel_path, prop_config, soil_prop_tif, stat)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

View File

@@ -0,0 +1,315 @@
# -*- coding: utf-8 -*-
import os
import re
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font, Border, Side, Alignment
from openpyxl.utils import get_column_letter
from tools.config.pandas_field_cal_func import calculate_ejdl, calculate_yjdl
from tools.core.utils.os_utils import temp_files_processor
yjdl_order = ["耕地", "园地", "林地", "草地", "其他"]
ejdl_order = ["水田", "旱地", "水浇地", "果园", "茶园", "橡胶园", "其他园地"]
# 土壤12级地质类别
trzd_order = ['砂土及壤质砂土', '砂质壤土','壤土','粉(砂)质壤土','砂质黏壤土','黏壤土','粉(砂)质黏壤土','砂质黏土','壤质黏土','粉(砂)质黏土','黏土','重黏土']
# --- 2. 辅助函数 ---
# 判断单元格类型
def get_merge_type(merged_range):
"""
判断合并类型
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None不是合并单元格
"""
if not merged_range:
return None
min_row, max_row = merged_range.min_row, merged_range.max_row
min_col, max_col = merged_range.min_col, merged_range.max_col
if max_row > min_row and max_col > min_col:
return 'both' # 同时跨行和跨列
elif max_row > min_row:
return 'row' # 行合并(垂直合并)
elif max_col > min_col:
return 'column' # 列合并(水平合并)
else:
return None # 实际上不是合并单元格
# --- 3. 数据处理与分析 均值---
def process_data_for_table2(gdb_path, soil_prop_feature_name, df_dltb, target_areas_df):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算样点数 ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_feature_name
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
sample_fields = ['TDLYLX', field_name]
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
df_samples = clean_df(df_samples, [field_name])
df_samples["YJDL"] = df_samples['TDLYLX'].apply(calculate_yjdl)
df_samples["EJDL"] = df_samples['TDLYLX'].apply(calculate_ejdl)
df_samples["GRIDCODE"] = df_samples[field_name].astype(int)
# 按 YJDL, EJDL 分组,计算 属性 的均值
df_sample_means = df_samples.groupby(['YJDL', 'EJDL', 'GRIDCODE']).size().reset_index(name="样点数")
total_sample_count = df_sample_means['样点数'].sum()
df_sample_means['样点数占比'] = df_sample_means['样点数'] / total_sample_count
# ==b. 处理制图数据,获各等级制图面积
df_dltb["YJDL"] = df_dltb['YJDL_EJDL'].apply(lambda x: x.split('_')[0])
df_dltb["EJDL"] = df_dltb["YJDL_EJDL"].apply(lambda x: x.split('_')[1])
df_dltb.columns = df_dltb.columns.str.upper()
df_dltb = clean_df(df_dltb, ['YJDL', 'EJDL'])
df_map_data = df_dltb.groupby(["YJDL","EJDL", "GRIDCODE"]).agg({"AREA": "sum"}).reset_index()
df_map_data['制图面积_原始'] = df_map_data['AREA'] * 0.0015 # 单位:亩
# df_map_data['面积占比'] = df_map_data['制图面积'] / df_map_data['制图面积'].sum()
# 第二步:整理目标面积表(确保字段名统一)
target_areas_df = target_areas_df.copy()
target_areas_df.columns = target_areas_df.columns.str.strip() # 去除字段名空格
# 重置索引确保EJDL是列而不是索引
if 'EJDL' not in target_areas_df.columns:
target_areas_df = target_areas_df.reset_index()
target_areas_df.rename(columns={'index': 'EJDL'}, inplace=True)
# 确保面积字段为数值型
target_areas_df['面积'] = pd.to_numeric(target_areas_df['面积'], errors='coerce').fillna(0)
# 第三步:按二级地类分组计算平差系数
# 先计算每个二级地类的原始合计面积
ejdl_original_sum = df_map_data.groupby('EJDL')['制图面积_原始'].sum().reset_index()
ejdl_original_sum.rename(columns={'制图面积_原始': '原始合计面积'}, inplace=True)
# 合并目标面积
ejdl_adj = pd.merge(ejdl_original_sum, target_areas_df, on='EJDL', how='left')
ejdl_adj.rename(columns={'面积': '目标合计面积'}, inplace=True)
# 填充无目标面积的二级地类(目标面积=原始面积,平差系数=1
ejdl_adj['目标合计面积'] = ejdl_adj['目标合计面积'].fillna(ejdl_adj['原始合计面积'])
# 计算平差系数(目标面积 / 原始面积避免除以0
ejdl_adj['平差系数'] = ejdl_adj['目标合计面积'] / ejdl_adj['原始合计面积'].replace(0, 1)
ejdl_adj['平差系数'] = ejdl_adj['平差系数'].fillna(1) # 极端情况填充1
# 第四步:应用平差系数到每个质地级别的制图面积
df_map_data = pd.merge(df_map_data, ejdl_adj[['EJDL', '平差系数']], on='EJDL', how='left')
df_map_data['平差系数'] = df_map_data['平差系数'].fillna(1) # 未匹配到的二级地类系数=1
# 计算平差后的制图面积
df_map_data['制图面积'] = df_map_data['制图面积_原始'] * df_map_data['平差系数']
# 重新计算面积占比(基于平差后的面积)
total_adjusted_area = df_map_data['制图面积'].sum()
df_map_data['面积占比'] = df_map_data['制图面积'] / total_adjusted_area
df_map_data = clean_df(df_map_data, ['YJDL', 'EJDL'])
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['YJDL', 'EJDL', 'GRIDCODE']],
df_map_data[['YJDL', 'EJDL', 'GRIDCODE']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['YJDL', 'EJDL', 'GRIDCODE'], how='left')
df_final = pd.merge(df_final, df_map_data, on=['YJDL', 'EJDL', 'GRIDCODE'], how='left')
# (可选) 按“一级地类”和“二级地类”排序
in_ejdl_order = ejdl_order + [x for x in df_final['EJDL'].unique() if x not in ejdl_order]
df_final["YJDL"] = pd.Categorical(df_final['YJDL'], categories=yjdl_order, ordered=True)
df_final["EJDL"] = pd.Categorical(df_final['EJDL'], categories=in_ejdl_order, ordered=True)
df_final["GRIDCODE"] = pd.Categorical(df_final['GRIDCODE'], categories=sorted(df_final['GRIDCODE'].unique()), ordered=True)
df_final.sort_values(['YJDL', 'EJDL', 'GRIDCODE'], inplace=True)
print("数据处理流程完成!")
return df_final
# 写入EXCEL 表2
def write_to_excel_table2(df, output_path, prop_config):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同土地利用类型属性变化统计"
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
cell_font = Font(name='等线', size=11)
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
def apply_style(cell_range, font, alignment=None, border=None):
for row in ws[cell_range]:
for cell in row:
cell.font = font
if alignment: cell.alignment = alignment
if border: cell.border = border
# --- b. 绘制表头 ---
ws.merge_cells('A1:B1'); ws['A1'] = '土地利用类型'
ws.merge_cells('C1:E1'); ws['C1'] = '样点统计'
ws.merge_cells('F1:G1'); ws['F1'] = '制图统计'
ws['A2'] = '一级'
ws['B2'] = '二级'
ws['C2'] = '质地类型'
ws['D2'] = '数量/个'
ws['E2'] = '占比%'
ws['F2'] = '面积/亩'
ws['G2'] = '占比%'
level_dict = prop_config['标准等级']
# 创建两个列表来分别存储上段和下段范围
upper_ranges = {value: key for key, value in level_dict.items()}
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('YJDL', sort=False, observed=False):
if group_yl_df.empty:
continue
print(f"正在写入一级地类: {yl}...")
yl_start_row = current_row
# 按二级地类分组
for ej, group_ej_df in group_yl_df.groupby('EJDL', sort=False, observed=False):
if group_ej_df.empty:
continue
print(f"正在写入二级地类: {ej}...")
ej_start_row = current_row
# 按“土壤质地分级”分组
for idx, row_data in group_ej_df.iterrows():
# 填充土壤质地分类
ws.cell(row=current_row, column=3).value = upper_ranges.get(str(row_data['GRIDCODE']), '-')
# 填充样点数据
ws.cell(row=current_row, column=4).value = row_data['样点数'] if not np.isnan(row_data['样点数']) else '-'
ws.cell(row=current_row, column=5).value = round(row_data['样点数占比']*100, 2) if not np.isnan(row_data['样点数占比']) else '-'
# 填充制图数据
ws.cell(row=current_row, column=6).value = round(row_data['制图面积'], 0) if not np.isnan(row_data['制图面积']) else '-'
ws.cell(row=current_row, column=7).value = round(row_data['面积占比']*100, 2) if not np.isnan(row_data['面积占比']) else '-'
current_row += 1
# 合并二级地类单元格
if ej_start_row <= current_row:
ws.merge_cells(start_row=ej_start_row, start_column=2, end_row=current_row-1, end_column=2)
ws.cell(row=ej_start_row, column=2).value = ej
# 一级地类合计行
ws.merge_cells(start_row=current_row, start_column=2, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=2).value = '合计'
ws.cell(row=current_row, column=4).value = round(group_yl_df['样点数'].sum(), 0) if not np.isnan(group_yl_df['样点数'].sum()) else '-'
ws.cell(row=current_row, column=5).value = round(group_yl_df['样点数占比'].sum()*100, 2) if not np.isnan(group_yl_df['样点数占比'].sum()) else '-'
ws.cell(row=current_row, column=6).value = round(group_yl_df['制图面积'].sum(), 0) if not np.isnan(group_yl_df['制图面积'].sum()) else '-'
ws.cell(row=current_row, column=7).value = round(group_yl_df['面积占比'].sum()*100, 2) if not np.isnan(group_yl_df['面积占比'].sum()) else '-'
# 合并一级地类单元格(修正合并范围)
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
current_row += 1
# --- 5. 全区汇总行 ---
ws.cell(row=current_row, column=1).value = '全区汇总'
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=4).value = round(df_to_write['样点数'].sum(), 0) if not np.isnan(df_to_write['样点数'].sum()) else '-'
ws.cell(row=current_row, column=5).value = round(df_to_write['样点数占比'].sum()*100, 2) if not np.isnan(df_to_write['样点数占比'].sum()) else '-'
ws.cell(row=current_row, column=6).value = round(df_to_write['制图面积'].sum(), 0) if not np.isnan(df_to_write['制图面积'].sum()) else '-'
ws.cell(row=current_row, column=7).value = round(df_to_write['面积占比'].sum()*100, 2) if not np.isnan(df_to_write['面积占比'].sum()) else '-'
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
apply_style(f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
dims = {}
for row in ws.rows:
for cell in row:
if cell.value:
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
if get_merge_type(merged_range) == 'column':
continue
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
# 设置列宽
for col, value in dims.items():
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
# def main(gdb_path, soil_prop_name, dltb_features, reclassed_feature, output_path,target_areas_df, prop_config):
# print(target_areas_df)
# df = pd.read_csv(r"D:\ProgramData\ArcGis_Py\测试数据.csv")
# output_path = r"E:\@三普属性图出图\测试\AAA.xlsx"
# write_to_excel_table2(df,output_path,prop_config)
def main(gdb_path, soil_prop_name, dltb_features, reclassed_feature, output_path,target_areas_df, prop_config):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土地利用类型土壤.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
out_table_mean = r"in_memory/out_table_mean"
temp_files.append(out_table_mean)
if not arcpy.Exists(out_table_mean):
# 2.使用交集制表计算每个TRZD的面积
arcpy.analysis.TabulateIntersection(dltb_features, "YJDL_EJDL", reclassed_feature, out_table_mean, "gridcode", out_units="SQUARE_METERS")
dltb_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YJDL_EJDL", "gridcode", "AREA"]))
# 生成表1 土壤属性分级分布 的统计Excel报告
final_dataframe = process_data_for_table2(gdb_path, soil_prop_name, dltb_df, target_areas_df)
write_to_excel_table2(final_dataframe, output_excel_path, prop_config)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# df = pd.read_csv(r"D:\ProgramData\ArcGis_Py\测试数据.csv")
# output_path = r"E:\@三普属性图出图\测试\AAA.xlsx"
# write_to_excel_table2(df,output_path)

View File

@@ -0,0 +1,336 @@
# -*- coding: utf-8 -*-
import os
import re
from matplotlib.artist import get
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font, Border, Side, Alignment
from openpyxl.utils import get_column_letter
from tools.config.pandas_field_cal_func import calculate_ejdl, calculate_yjdl
from tools.core.utils.os_utils import temp_files_processor
yjdl_order = ["耕地", "园地", "林地", "草地", "其他"]
ejdl_order = ["水田", "旱地", "水浇地", "果园", "茶园", "橡胶园", "其他园地"]
# 土壤12级地质类别
trzd_order = ['黏壤质','黏质','壤质','砂壤质','砂质']
# --- 2. 辅助函数 ---
def get_prop_level(prop_level):
"""根据输入值判断 返回等级"""
if pd.isna(prop_level) or prop_level == 0:
return "-"
# 请根据您的实际分级标准调整这里的阈值
if int(prop_level) == 5 or prop_level == "砂质":
return "砂质"
elif int(prop_level) == 4 or prop_level == "砂壤质":
return "砂壤质"
elif int(prop_level) == 3 or prop_level == "壤质":
return "壤质"
elif int(prop_level) == 1 or prop_level == "黏壤质":
return "黏壤质"
elif int(prop_level) == 2 or prop_level == "黏质":
return "黏质"
else:
return "-"
# 判断单元格类型
def get_merge_type(merged_range):
"""
判断合并类型
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None不是合并单元格
"""
if not merged_range:
return None
min_row, max_row = merged_range.min_row, merged_range.max_row
min_col, max_col = merged_range.min_col, merged_range.max_col
if max_row > min_row and max_col > min_col:
return 'both' # 同时跨行和跨列
elif max_row > min_row:
return 'row' # 行合并(垂直合并)
elif max_col > min_col:
return 'column' # 列合并(水平合并)
else:
return None # 实际上不是合并单元格
# --- 3. 数据处理与分析 均值---
def process_data_for_table2(gdb_path, soil_prop_feature_name, df_dltb, target_areas_df):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算样点数 ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_feature_name
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
sample_fields = ['TDLYLX', field_name]
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
df_samples = clean_df(df_samples, [field_name])
df_samples["YJDL"] = df_samples['TDLYLX'].apply(calculate_yjdl)
df_samples["EJDL"] = df_samples['TDLYLX'].apply(calculate_ejdl)
df_samples["GRIDCODE"] = df_samples[field_name]
# 按 YJDL, EJDL 分组,计算 属性 的均值
df_sample_means = df_samples.groupby(['YJDL', 'EJDL', 'GRIDCODE']).size().reset_index(name="样点数")
total_sample_count = df_sample_means['样点数'].sum()
df_sample_means['样点数占比'] = df_sample_means['样点数'] / total_sample_count
# ==b. 处理制图数据,获各等级制图面积
df_dltb["YJDL"] = df_dltb['YJDL_EJDL'].apply(lambda x: x.split('_')[0])
df_dltb["EJDL"] = df_dltb["YJDL_EJDL"].apply(lambda x: x.split('_')[1])
df_dltb.columns = df_dltb.columns.str.upper()
df_dltb = clean_df(df_dltb, ['YJDL', 'EJDL'])
df_dltb['GRIDCODE'] = df_dltb['GRIDCODE'].apply(get_prop_level)
df_map_data = df_dltb.groupby(["YJDL","EJDL", "GRIDCODE"]).agg({"AREA": "sum"}).reset_index()
df_map_data['制图面积_原始'] = df_map_data['AREA'] * 0.0015 # 单位:亩
# df_map_data['面积占比'] = df_map_data['制图面积'] / df_map_data['制图面积'].sum()
# 第二步:整理目标面积表(确保字段名统一)
target_areas_df = target_areas_df.copy()
target_areas_df.columns = target_areas_df.columns.str.strip() # 去除字段名空格
# 重置索引确保EJDL是列而不是索引
if 'EJDL' not in target_areas_df.columns:
target_areas_df = target_areas_df.reset_index()
target_areas_df.rename(columns={'index': 'EJDL'}, inplace=True)
# 确保面积字段为数值型
target_areas_df['面积'] = pd.to_numeric(target_areas_df['面积'], errors='coerce').fillna(0)
# 第三步:按二级地类分组计算平差系数
# 先计算每个二级地类的原始合计面积
ejdl_original_sum = df_map_data.groupby('EJDL')['制图面积_原始'].sum().reset_index()
ejdl_original_sum.rename(columns={'制图面积_原始': '原始合计面积'}, inplace=True)
# 合并目标面积
ejdl_adj = pd.merge(ejdl_original_sum, target_areas_df, on='EJDL', how='left')
ejdl_adj.rename(columns={'面积': '目标合计面积'}, inplace=True)
# 填充无目标面积的二级地类(目标面积=原始面积,平差系数=1
ejdl_adj['目标合计面积'] = ejdl_adj['目标合计面积'].fillna(ejdl_adj['原始合计面积'])
# 计算平差系数(目标面积 / 原始面积避免除以0
ejdl_adj['平差系数'] = ejdl_adj['目标合计面积'] / ejdl_adj['原始合计面积'].replace(0, 1)
ejdl_adj['平差系数'] = ejdl_adj['平差系数'].fillna(1) # 极端情况填充1
# 第四步:应用平差系数到每个质地级别的制图面积
df_map_data = pd.merge(df_map_data, ejdl_adj[['EJDL', '平差系数']], on='EJDL', how='left')
df_map_data['平差系数'] = df_map_data['平差系数'].fillna(1) # 未匹配到的二级地类系数=1
# 计算平差后的制图面积
df_map_data['制图面积'] = df_map_data['制图面积_原始'] * df_map_data['平差系数']
# 重新计算面积占比(基于平差后的面积)
total_adjusted_area = df_map_data['制图面积'].sum()
df_map_data['面积占比'] = df_map_data['制图面积'] / total_adjusted_area
df_map_data = clean_df(df_map_data, ['YJDL', 'EJDL'])
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['YJDL', 'EJDL', 'GRIDCODE']],
df_map_data[['YJDL', 'EJDL', 'GRIDCODE']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['YJDL', 'EJDL', 'GRIDCODE'], how='left')
df_final = pd.merge(df_final, df_map_data, on=['YJDL', 'EJDL', 'GRIDCODE'], how='left')
# (可选) 按“一级地类”和“二级地类”排序
in_ejdl_order = ejdl_order + [x for x in df_final['EJDL'].unique() if x not in ejdl_order]
df_final["YJDL"] = pd.Categorical(df_final['YJDL'], categories=yjdl_order, ordered=True)
df_final["EJDL"] = pd.Categorical(df_final['EJDL'], categories=in_ejdl_order, ordered=True)
df_final["GRIDCODE"] = pd.Categorical(df_final['GRIDCODE'], categories=sorted(df_final['GRIDCODE'].unique()), ordered=True)
df_final.sort_values(['YJDL', 'EJDL', 'GRIDCODE'], inplace=True)
print("数据处理流程完成!")
return df_final
# 写入EXCEL 表2
def write_to_excel_table2(df, output_path, prop_config):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同土地利用类型属性变化统计"
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
cell_font = Font(name='等线', size=11)
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
def apply_style(cell_range, font, alignment=None, border=None):
for row in ws[cell_range]:
for cell in row:
cell.font = font
if alignment: cell.alignment = alignment
if border: cell.border = border
# --- b. 绘制表头 ---
ws.merge_cells('A1:B1'); ws['A1'] = '土地利用类型'
ws.merge_cells('C1:E1'); ws['C1'] = '样点统计'
ws.merge_cells('F1:G1'); ws['F1'] = '制图统计'
ws['A2'] = '一级'
ws['B2'] = '二级'
ws['C2'] = '质地类型'
ws['D2'] = '数量/个'
ws['E2'] = '占比%'
ws['F2'] = '面积/亩'
ws['G2'] = '占比%'
level_dict = prop_config['标准等级']
# 创建两个列表来分别存储上段和下段范围
upper_ranges = {value: key for key, value in level_dict.items()}
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('YJDL', sort=False, observed=False):
if group_yl_df.empty:
continue
print(f"正在写入一级地类: {yl}...")
yl_start_row = current_row
# 按二级地类分组
for ej, group_ej_df in group_yl_df.groupby('EJDL', sort=False, observed=False):
if group_ej_df.empty:
continue
print(f"正在写入二级地类: {ej}...")
ej_start_row = current_row
# 按“土壤质地分级”分组
for idx, row_data in group_ej_df.iterrows():
# 填充土壤质地分类
ws.cell(row=current_row, column=3).value = str(row_data['GRIDCODE'])
# 填充样点数据
ws.cell(row=current_row, column=4).value = row_data['样点数'] if not np.isnan(row_data['样点数']) else '-'
ws.cell(row=current_row, column=5).value = round(row_data['样点数占比']*100, 2) if not np.isnan(row_data['样点数占比']) else '-'
# 填充制图数据
ws.cell(row=current_row, column=6).value = round(row_data['制图面积'], 0) if not np.isnan(row_data['制图面积']) else '-'
ws.cell(row=current_row, column=7).value = round(row_data['面积占比']*100, 2) if not np.isnan(row_data['面积占比']) else '-'
current_row += 1
# 合并二级地类单元格
if ej_start_row <= current_row:
ws.merge_cells(start_row=ej_start_row, start_column=2, end_row=current_row-1, end_column=2)
ws.cell(row=ej_start_row, column=2).value = ej
# 一级地类合计行
ws.merge_cells(start_row=current_row, start_column=2, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=2).value = '合计'
ws.cell(row=current_row, column=4).value = round(group_yl_df['样点数'].sum(), 0) if not np.isnan(group_yl_df['样点数'].sum()) else '-'
ws.cell(row=current_row, column=5).value = round(group_yl_df['样点数占比'].sum()*100, 2) if not np.isnan(group_yl_df['样点数占比'].sum()) else '-'
ws.cell(row=current_row, column=6).value = round(group_yl_df['制图面积'].sum(), 0) if not np.isnan(group_yl_df['制图面积'].sum()) else '-'
ws.cell(row=current_row, column=7).value = round(group_yl_df['面积占比'].sum()*100, 2) if not np.isnan(group_yl_df['面积占比'].sum()) else '-'
# 合并一级地类单元格(修正合并范围)
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
current_row += 1
# --- 5. 全区汇总行 ---
ws.cell(row=current_row, column=1).value = '全区汇总'
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=4).value = round(df_to_write['样点数'].sum(), 0) if not np.isnan(df_to_write['样点数'].sum()) else '-'
ws.cell(row=current_row, column=5).value = round(df_to_write['样点数占比'].sum()*100, 2) if not np.isnan(df_to_write['样点数占比'].sum()) else '-'
ws.cell(row=current_row, column=6).value = round(df_to_write['制图面积'].sum(), 0) if not np.isnan(df_to_write['制图面积'].sum()) else '-'
ws.cell(row=current_row, column=7).value = round(df_to_write['面积占比'].sum()*100, 2) if not np.isnan(df_to_write['面积占比'].sum()) else '-'
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
apply_style(f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
dims = {}
for row in ws.rows:
for cell in row:
if cell.value:
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
if get_merge_type(merged_range) == 'column':
continue
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
# 设置列宽
for col, value in dims.items():
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
# def main(gdb_path, soil_prop_name, dltb_features, reclassed_feature, output_path,target_areas_df, prop_config):
# print(target_areas_df)
# df = pd.read_csv(r"D:\ProgramData\ArcGis_Py\测试数据.csv")
# output_path = r"E:\@三普属性图出图\测试\AAA.xlsx"
# write_to_excel_table2(df,output_path,prop_config)
def main(gdb_path, soil_prop_name, dltb_features, reclassed_feature, output_path,target_areas_df, prop_config):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土地利用类型土壤.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
out_table_mean = r"in_memory/out_table_mean"
temp_files.append(out_table_mean)
if not arcpy.Exists(out_table_mean):
# 2.使用交集制表计算每个TRZD的面积
arcpy.analysis.TabulateIntersection(dltb_features, "YJDL_EJDL", reclassed_feature, out_table_mean, "gridcode", out_units="SQUARE_METERS")
dltb_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YJDL_EJDL", "gridcode", "AREA"]))
# 生成表1 土壤属性分级分布 的统计Excel报告
final_dataframe = process_data_for_table2(gdb_path, soil_prop_name, dltb_df, target_areas_df)
write_to_excel_table2(final_dataframe, output_excel_path, prop_config)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# df = pd.read_csv(r"D:\ProgramData\ArcGis_Py\测试数据.csv")
# output_path = r"E:\@三普属性图出图\测试\AAA.xlsx"
# write_to_excel_table2(df,output_path)

View File

@@ -0,0 +1,328 @@
# -*- coding: utf-8 -*-
import os
import re
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font
from openpyxl.utils import get_column_letter
from tools.config.pandas_field_cal_func import calculate_ejdl, calculate_yjdl
from tools.core.utils.os_utils import temp_files_processor
from tools.core.utils.excel_utils import ExcelStyleUtils
yjdl_order = ["耕地", "园地", "林地", "草地", "其他"]
ejdl_order = ["水田", "旱地", "水浇地", "果园", "茶园", "橡胶园", "其他园地"]
# --- 3. 数据处理与分析 均值---
def process_data_for_table2(gdb_path, soil_prop_feature_name, df_dltb, target_areas_df):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_feature_name
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
sample_fields = ['TDLYLX', field_name]
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
df_samples = clean_df(df_samples, [field_name])
df_samples["YJDL"] = df_samples['TDLYLX'].apply(calculate_yjdl)
df_samples["EJDL"] = df_samples['TDLYLX'].apply(calculate_ejdl)
df_samples[field_name] = df_samples[field_name].astype(float)
# 按 YJDL, EJDL 分组,计算 属性 的均值
df_sample_means = df_samples.groupby(['YJDL', 'EJDL'])[field_name].agg(['count', 'max', 'min', 'mean']).reset_index()
# ==b. 处理制图数据,获各等级制图面积
df_dltb["YJDL"] = df_dltb['YJDL_EJDL'].apply(lambda x: x.split('_')[0])
df_dltb["EJDL"] = df_dltb["YJDL_EJDL"].apply(lambda x: x.split('_')[1])
df_dltb = clean_df(df_dltb, ['YJDL', 'EJDL'])
df_dltb.rename(columns={'MEAN': '制图均值', 'COUNT': '制图样点数'}, inplace=True)
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['YJDL', 'EJDL']],
df_dltb[['YJDL', 'EJDL']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['YJDL', 'EJDL'], how='left')
df_final = pd.merge(df_final, df_dltb, on=['YJDL', 'EJDL'], how='left')
df_final = pd.merge(df_final, target_areas_df, on=['EJDL'], how='left')
# (可选) 按“一级地类”和“二级地类”排序
in_ejdl_order = ejdl_order + [x for x in df_final['EJDL'].unique() if x not in ejdl_order]
df_final["YJDL"] = pd.Categorical(df_final['YJDL'], categories=yjdl_order, ordered=True)
df_final["EJDL"] = pd.Categorical(df_final['EJDL'], categories=in_ejdl_order, ordered=True)
df_final.sort_values(['YJDL', 'EJDL'], inplace=True)
print("数据处理流程完成!")
return df_final
# 写入EXCEL 表2
def write_to_excel_table2(df, output_path, prop_config:dict, soil_prop_name: str = ''):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同土地利用类型属性变化统计"
# 获取属性单位
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅','全钾']
fsn_props = ['砂粒含量','粉粒含量','黏粒含量','有效土层厚度']
prop_name_str = prop_config.get('项目分级','')
if prop_name_str:
split_name = prop_name_str.split('\n')[0].strip()
if split_name in special_prop:
prop_name = '1f'
elif split_name in fsn_props:
prop_name = '0f'
else:
prop_name = '2f'
else:
prop_name = '1f'
prop_unit_str = prop_config.get('分级标准', '')
if prop_unit_str:
prop_unit = prop_unit_str.split('\n')[1].strip()
else:
prop_unit = ''
# --- b. 绘制表头 ---
ws.merge_cells('A1:B1'); ws['A1'] = '土地利用类型'
ws.merge_cells('C1:E1'); ws['C1'] = '样点统计'
ws.merge_cells('F1:G1'); ws['F1'] = '制图统计'
ws['A2'] = '一级'
ws['B2'] = '二级'
ws['C2'] = '均值/' + prop_unit
ws['D2'] = '范围/' + prop_unit
ws['E2'] = '数量/个'
ws['F2'] = '均值/' + prop_unit
ws['G2'] = '面积/亩'
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
filtered_props = ['ECA', 'EMG', 'ACU', 'AZN', 'AFE', 'AMN', 'AMO', 'AB', 'AS1', 'TSE']
for yl, group_yl_df in df_to_write.groupby('YJDL', sort=False, observed=False):
print(f"正在写入一级地类: {yl}...")
yl_start_row = current_row
# 遍历该一级地类下的所有“二级地类”
for _, row_data in group_yl_df.iterrows():
ws.cell(row=current_row, column=2).value = row_data['EJDL']
# 填充样点数据
sample_mean = row_data.get('mean')
if pd.notna(sample_mean):
ws.cell(row=current_row, column=3).value = f"{sample_mean:.{prop_name}}"
ws.cell(row=current_row, column=4).value = f"{row_data.get('min', '-'):.{prop_name}}{row_data.get('max', '-'):.{prop_name}}"
ws.cell(row=current_row, column=5).value = row_data.get('count', '-')
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = "-"
ws.cell(row=current_row, column=5).value = "-"
# 填充制图数据
map_mean = row_data.get('制图均值')
if pd.notna(map_mean):
ws.cell(row=current_row, column=6).value = f"{map_mean:.{prop_name}}"
ws.cell(row=current_row, column=7).value = f"{row_data.get('面积', '-'):.0f}"
else:
ws.cell(row=current_row, column=6).value = "-"
ws.cell(row=current_row, column=7).value = "-"
current_row += 1
# 计算并写入“合计”行
if ws.cell(row=current_row-1, column=2).value in ["林地", "草地", "其他"]:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=yl_start_row, end_column=2)
ws.cell(row=yl_start_row, column=1).value = yl
if soil_prop_name in filtered_props:
ws.cell(row=yl_start_row, column=6).value = "-"
ws.cell(row=yl_start_row, column=7).value = "-"
continue
ws.cell(row=current_row, column=2).value = '合计'
# 计算合计行的均值 (均值的均值)
total_count = group_yl_df['count'].sum()
weighted_sum = group_yl_df['mean']*group_yl_df['count']
if not weighted_sum.empty and total_count != 0:
total_sample_mean = weighted_sum.sum()/group_yl_df['count'].sum()
else:
total_sample_mean = None
min_min, max_max = group_yl_df['min'].min(), group_yl_df['max'].max()
if pd.notna(total_sample_mean):
ws.cell(row=current_row, column=3).value = f"{total_sample_mean:.{prop_name}}"
ws.cell(row=current_row, column=4).value = f"{min_min:.{prop_name}}{max_max:.{prop_name}}"
ws.cell(row=current_row, column=5).value = f"{total_count:.0f}"
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = "-"
ws.cell(row=current_row, column=5).value = "-"
# b. **【核心修正】: 计算合计行的“制图均值”(加权平均)**
# 准备加权平均的分子和分母
weighted_sum = 0
total_count = 0
# 遍历当前一级地类分组中的每一行
for _, row in group_yl_df.iterrows():
mean_val = row.get('制图均值')
count_val = row.get('制图样点数')
# 只有当均值和样点数都存在且有效时,才参与计算
if pd.notna(mean_val) and pd.notna(count_val) and count_val > 0:
weighted_sum += mean_val * count_val # Σ (mean * count)
total_count += count_val # Σ (count)
# 计算加权平均值
weighted_avg = (weighted_sum / total_count) if total_count > 0 else 0
total_area = group_yl_df['面积'].sum()
if weighted_avg > 0:
ws.cell(row=current_row, column=6).value = f"{weighted_avg:.{prop_name}}"
ws.cell(row=current_row, column=7).value = f"{total_area:.0f}"
else:
ws.cell(row=current_row, column=6).value = "-"
ws.cell(row=current_row, column=7).value = "-"
# 合并“一级地类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
current_row += 1
# 计算全区的均值、范围、数量
if soil_prop_name in filtered_props:
# 只基于耕地和园地计算全区统计
df_for_total = df_to_write[df_to_write['YJDL'].isin(['耕地', '园地'])].copy()
print(f"全区统计过滤仅基于耕地和园地YJDL in ['耕地', '园地']")
else:
df_for_total = df_to_write.copy()
# 使用 df_for_total 进行后续计算
total_weighted_sum = df_for_total['mean'] * df_for_total['count']
total_counts = df_for_total['count'].sum()
if total_counts > 0:
total_mean = total_weighted_sum.sum() / total_counts
else:
total_mean = None
if not df_for_total.empty:
total_range = f"{df_for_total['min'].min():.{prop_name}}{df_for_total['max'].max():.{prop_name}}"
total_zhitu_weighted_sum = df_for_total['制图均值']*df_for_total['面积']
total_areas = df_for_total['面积'].sum()
if total_areas > 0:
total_zhitu_mean = total_zhitu_weighted_sum.sum() / total_areas
else:
total_zhitu_mean = None
else:
total_range = "-"
total_zhitu_mean = None
total_areas = 0
# 填充全区统计行
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=2)
ws.cell(row=current_row, column=1).value = '全区'
if pd.notna(total_mean):
ws.cell(row=current_row, column=3).value = f"{total_mean:.{prop_name}}"
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = total_range
ws.cell(row=current_row, column=5).value = f"{total_counts:.0f}" if total_counts > 0 else "-"
if pd.notna(total_zhitu_mean):
ws.cell(row=current_row, column=6).value = f"{total_zhitu_mean:.{prop_name}}"
else:
ws.cell(row=current_row, column=6).value = "-"
ws.cell(row=current_row, column=7).value = f"{total_areas:.0f}" if total_areas > 0 else "-"
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws,f'A1:{max_col_letter}{current_row}')
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
# 设置列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
def main(gdb_path, soil_prop_name, dltb_features, soil_prop_tif, output_path,target_areas_df, prop_config):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土地利用类型土壤.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
out_table_mean = r"in_memory/out_table_mean"
temp_files.append(out_table_mean)
if not arcpy.Exists(out_table_mean):
# 2.用arcpy.sa.ZonalStatisticsAsTable 以表格进行分区统计
arcpy.sa.ZonalStatisticsAsTable(dltb_features, "YJDL_EJDL", soil_prop_tif, out_table_mean, "DATA", "MEAN")
dltb_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YJDL_EJDL", "MEAN", "COUNT"]))
# 生成表1 土壤属性分级分布 的统计Excel报告
final_dataframe = process_data_for_table2(gdb_path, soil_prop_name, dltb_df, target_areas_df)
# final_dataframe = process_data_for_table5_2(gdb_path, out_table_area, sample_table_name, df_with_factors)
write_to_excel_table2(final_dataframe, output_excel_path, prop_config, soil_prop_name)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

View File

@@ -0,0 +1,446 @@
# -*- coding: utf-8 -*-
import os
import re
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font, Border, Side, Alignment
from openpyxl.utils import get_column_letter
from tools.config.pandas_field_cal_func import calculate_muyan, calculate_muzhi
from tools.config.custom_sort import yl_order, ts_order
from tools.core.utils.os_utils import temp_files_processor
# --- 2. 辅助函数 ---
# 判断单元格类型
def get_merge_type(merged_range):
"""
判断合并类型
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None不是合并单元格
"""
if not merged_range:
return None
min_row, max_row = merged_range.min_row, merged_range.max_row
min_col, max_col = merged_range.min_col, merged_range.max_col
if max_row > min_row and max_col > min_col:
return 'both' # 同时跨行和跨列
elif max_row > min_row:
return 'row' # 行合并(垂直合并)
elif max_col > min_col:
return 'column' # 列合并(水平合并)
else:
return None # 实际上不是合并单元格
# --- 3. 数据处理与分析 均值---
def process_data_for_table3(soil_prop_name, df_trlx_sample, df_trlx_zhitu, df_trlx, target_areas_df):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns) -> pd.DataFrame:
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_name
sample_fields = ['YL', 'TS', field_name]
df_samples = clean_df(df_trlx_sample, sample_fields)
df_samples["GRIDCODE"] = df_samples[field_name].astype(int)
# 通过土属计算母岩母质
df_samples['母岩'] = df_samples['TS'].apply(calculate_muyan)
df_samples['母质'] = df_samples['母岩'].apply(calculate_muzhi)
# 按 YJDL, EJDL 分组
df_sample_means = df_samples.groupby(['YL', 'TS', 'GRIDCODE']).size().reset_index(name="样点数")
total_sample_count = df_sample_means['样点数'].sum()
df_sample_means['样点数占比'] = df_sample_means['样点数'] / total_sample_count
# df_sample_mymz = df_samples.groupby(['母质', '母岩', 'TZ'])[field_name].agg(['count', 'mean', 'median']).reset_index()
# print(df_sample_mymz)
# ==b. 处理制图数据,获各等级制图面积
df_trlx_zhitu["YL"] = df_trlx_zhitu['YL_TS'].apply(lambda x: x.split('_')[0])
df_trlx_zhitu["TS"] = df_trlx_zhitu["YL_TS"].apply(lambda x: x.split('_')[1])
df_trlx_zhitu.columns = df_trlx_zhitu.columns.str.upper()
df_trlx_zhitu = clean_df(df_trlx_zhitu, ['YL', 'TS'])
df_map_data = df_trlx_zhitu.groupby(["YL","TS", "GRIDCODE"]).agg({"AREA": "sum"}).reset_index()
df_map_data['制图面积_原始'] = df_map_data['AREA'] * 0.0015 # 单位:亩
# ==c. 处理制图数据,获各等级制图面积
df_trlx = clean_df(df_trlx, ['YL', 'TS'])
df_trlx["面积"] = df_trlx["Shape@Area"] * 0.0015
# 拿到目标df总面积计算比例进行平差
target_areas = target_areas_df['面积'].sum()
original_area = df_trlx['面积'].sum()
adjusted_area_yz = target_areas / original_area
df_trlx["面积"] = df_trlx["面积"] * adjusted_area_yz
df_trlx_area = df_trlx.groupby(['YL', 'TS'])['面积'].sum().reset_index()
df_trlx_area['面积'] = pd.to_numeric(df_trlx_area['面积'], errors='coerce').fillna(0)
# ==========================
# 第三步:按二级地类分组计算平差系数
# 先计算每个二级地类的原始合计面积
ts_original_sum = df_map_data.groupby('TS')['制图面积_原始'].sum().reset_index()
ts_original_sum.rename(columns={'制图面积_原始': '原始合计面积'}, inplace=True)
# 合并目标面积
ts_adj = pd.merge(ts_original_sum, df_trlx_area, on='TS', how='left')
ts_adj.rename(columns={'面积': '目标合计面积'}, inplace=True)
# 填充无目标面积的二级地类(目标面积=原始面积,平差系数=1
ts_adj['目标合计面积'] = ts_adj['目标合计面积'].fillna(ts_adj['原始合计面积'])
# 计算平差系数(目标面积 / 原始面积避免除以0
ts_adj['平差系数'] = ts_adj['目标合计面积'] / ts_adj['原始合计面积'].replace(0, 1)
ts_adj['平差系数'] = ts_adj['平差系数'].fillna(1) # 极端情况填充1
# 第四步:应用平差系数到每个质地级别的制图面积
df_map_data = pd.merge(df_map_data, ts_adj[['TS', '平差系数']], on='TS', how='left')
df_map_data['平差系数'] = df_map_data['平差系数'].fillna(1) # 未匹配到的二级地类系数=1
# 计算平差后的制图面积
df_map_data['制图面积'] = df_map_data['制图面积_原始'] * df_map_data['平差系数']
# 重新计算面积占比(基于平差后的面积)
total_adjusted_area = df_map_data['制图面积'].sum()
df_map_data['面积占比'] = df_map_data['制图面积'] / total_adjusted_area
df_map_data = clean_df(df_map_data, ['YL', 'TS'])
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['YL', 'TS', 'GRIDCODE']],
df_map_data[['YL', 'TS', 'GRIDCODE']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['YL', 'TS', 'GRIDCODE'], how='left')
df_final = pd.merge(df_final, df_map_data, on=['YL', 'TS', 'GRIDCODE'], how='left')
# (可选) 按“亚类”和“土属”排序
in_yl_order = yl_order + [x for x in df_final['YL'].unique() if x not in yl_order]
in_ts_order = ts_order + [x for x in df_final['TS'].unique() if x not in ts_order]
df_final["YL"] = pd.Categorical(df_final['YL'], categories=in_yl_order, ordered=True)
df_final["TS"] = pd.Categorical(df_final['TS'], categories=in_ts_order, ordered=True)
df_final["GRIDCODE"] = pd.Categorical(df_final['GRIDCODE'], categories=sorted(df_final['GRIDCODE'].unique()), ordered=True)
df_final.sort_values(['YL', 'TS', 'GRIDCODE'], inplace=True)
print("数据处理流程完成!")
return df_final
# return df_final, df_sample_mymz
# 写入EXCEL 表2
def write_to_excel_table3(df, output_path, prop_config:dict):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同土壤类型属性变化统计"
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
cell_font = Font(name='等线', size=11)
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
def apply_style(cell_range, font, alignment=None, border=None):
for row in ws[cell_range]:
for cell in row:
cell.font = font
if alignment: cell.alignment = alignment
if border: cell.border = border
# --- b. 绘制表头 ---
ws.merge_cells('A1:B1'); ws['A1'] = '土壤类型'
ws.merge_cells('C1:E1'); ws['C1'] = '样点统计'
ws.merge_cells('F1:G1'); ws['F1'] = '制图统计'
ws['A2'] = '亚类'
ws['B2'] = '土属'
ws['C2'] = '质地类型'
ws['D2'] = '数量/个'
ws['E2'] = '占比%'
ws['F2'] = '面积/亩'
ws['G2'] = '占比%'
level_dict = prop_config['标准等级']
# 创建两个列表来分别存储上段和下段范围
upper_ranges = {value: key for key, value in level_dict.items()}
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('YL', sort=False, observed=True):
if group_yl_df.empty:
continue
print(f"正在写入亚类: {yl}...")
yl_start_row = current_row
# 按二级地类分组
for ts, group_ts_df in group_yl_df.groupby('TS', sort=False, observed=False):
if group_ts_df.empty:
continue
print(f"正在写入二级地类: {ts}...")
ts_start_row = current_row
# 遍历该亚类下下的所有“土属”
for _, row_data in group_ts_df.iterrows():
ws.cell(row=current_row, column=3).value = upper_ranges.get(str(row_data['GRIDCODE']), '-')
# 填充样点数据
ws.cell(row=current_row, column=4).value = row_data['样点数'] if not np.isnan(row_data['样点数']) else '-'
ws.cell(row=current_row, column=5).value = round(row_data['样点数占比']*100, 2) if not np.isnan(row_data['样点数占比']) else '-'
# 填充制图数据
ws.cell(row=current_row, column=6).value = round(row_data['制图面积'], 0) if not np.isnan(row_data['制图面积']) else '-'
ws.cell(row=current_row, column=7).value = round(row_data['面积占比']*100, 2) if not np.isnan(row_data['面积占比']) else '-'
current_row += 1
# 合并二级地类单元格
if ts_start_row <= current_row:
ws.merge_cells(start_row=ts_start_row, start_column=2, end_row=current_row-1, end_column=2)
ws.cell(row=ts_start_row, column=2).value = ts
# 合并“一级地类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row-1, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
# 计算全区的均值、范围、数量
total_areas = df_to_write['制图面积'].sum()
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=1).value = '全区'
ws.cell(row=current_row, column=4).value = df_to_write['样点数'].sum()
ws.cell(row=current_row, column=5).value = round(df_to_write['样点数占比'].sum()*100, 2)
ws.cell(row=current_row, column=6).value = round(total_areas, 0)
ws.cell(row=current_row, column=7).value = round(df_to_write['面积占比'].sum()*100, 2)
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
apply_style(f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
dims = {}
for row in ws.rows:
for cell in row:
if cell.value:
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
if get_merge_type(merged_range) == 'column':
continue
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
# 设置列宽
for col, value in dims.items():
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
# 母岩母质表
def write_to_excel_table4(df:pd.DataFrame, output_path, prop_config):
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "母岩母质土壤属性统计"
# 获取属性单位
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅','全钾','沙粒','粉粒','粘粒']
prop_name_str = prop_config.get('项目分级','')
if prop_name_str:
prop_name = prop_name_str.split('\n')[0].strip() in special_prop
else:
prop_name = False
prop_unit_str = prop_config.get('分级标准', '')
if prop_unit_str:
prop_unit = prop_unit_str.split('\n')[1].strip()
else:
prop_unit = ''
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
cell_font = Font(name='等线', size=11)
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
def apply_style(cell_range, font, alignment=None, border=None):
for row in ws[cell_range]:
for cell in row:
cell.font = font
if alignment: cell.alignment = alignment
if border: cell.border = border
# 写入表头
headers = ['母岩母质','', '土种类型', '样点统计', '']
ws.append(headers)
ws.append(['', '', '', f'均值/{prop_unit}', '数量/个'])
# 合并表头单元格
ws.merge_cells('A1:B2') # 母岩母质
ws.merge_cells('C1:C2') # 土种类型
ws.merge_cells('D1:E1') # 样点统计
current_row = 3
# 按母质和母岩进行分组
grouped = df.groupby(['母质', '母岩']).agg({
'TZ': lambda x: ''.join(x), # 将土种名称用逗号连接
'mean': 'mean', # 计算均值
'count': 'sum' # 计算总数
}).reset_index()
parent_materials = grouped['母质'].unique()
for parent_material in parent_materials:
parent_material_row = current_row
if parent_material == '未知':
continue
material_group = grouped[grouped['母质'] == parent_material]
# 写入母岩母质分组(只在第一行显示)
first_row_in_group = True
for _, row_data in material_group.iterrows():
if first_row_in_group:
# 第一行显示母岩母质名称
ws.cell(row=current_row, column=1, value=parent_material)
first_row_in_group = False
else:
# 后续行留空
ws.cell(row=current_row, column=1, value='')
# 写入母岩类型
ws.cell(row=current_row, column=2, value=row_data['母岩'])
# 写入土种类型(所有土种用逗号连接)
ws.cell(row=current_row, column=3, value=row_data['TZ'])
# 写入统计数据
ws.cell(row=current_row, column=4, value=round(row_data['mean'], 1))
ws.cell(row=current_row, column=5, value=row_data['count'])
current_row += 1
# 合并母岩母质分组
if parent_material_row < current_row:
ws.merge_cells(start_row=parent_material_row, start_column=1, end_row=current_row - 1, end_column=1)
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=1, value='全区')
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
apply_style(f'A1:{max_col_letter}2', header_font)
# 设置列宽
ws.column_dimensions["A"].width = 20
ws.column_dimensions["B"].width = 20
ws.column_dimensions["C"].width = 30
ws.column_dimensions["D"].width = 20
ws.column_dimensions["E"].width = 20
# 保存文件
wb.save(output_path)
print(f"数据已成功写入到 {output_path}")
def main(gdb_path, soil_prop_name, trlx_features, reclassed_feature, output_path,target_areas_df, prop_config):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path,f"{soil_prop_name}不同土壤类型土壤.xlsx") # 生成的Excel报告文件路径
# output_excel4_path = os.path.join(output_path,f"{soil_prop_name}不同母岩母质土壤属性.xlsx")
soil_prop_features = os.path.join(gdb_path,soil_prop_name)
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
temp_out_features = r"in_memory/temp_out_type_features"
out_table_mean = r"in_memory/out_table_type_mean"
temp_files.append(temp_out_features)
temp_files.append(out_table_mean)
# 2. 用样点进行空间连接到土壤类型图斑
fields_to_keep = {
soil_prop_features: [soil_prop_name],
trlx_features: ["YL", "TS", "TZ"],
}
field_mappings = arcpy.FieldMappings()
for join_features in fields_to_keep.keys():
for field_name in fields_to_keep[join_features]:
try:
field_map = arcpy.FieldMap()
field_map.addInputField(join_features, field_name)
field_map.mergeRule = "First" # 对所有连接字段使用 "First" 规则
field_mappings.addFieldMap(field_map)
except Exception as e:
print(f"警告: 添加字段 '{field_name}' (来自 '{join_features}') 时出错,将跳过。错误信息: {e}")
# 空间连接
arcpy.analysis.SpatialJoin(soil_prop_features, trlx_features, temp_out_features, "JOIN_ONE_TO_ONE", "KEEP_ALL",field_mappings, "INTERSECT")
# 3. 交集制表计算每个TRZD的面积
arcpy.analysis.TabulateIntersection(trlx_features, "YL_TS", reclassed_feature, out_table_mean, "gridcode", out_units="SQUARE_METERS")
trlx_zhitu_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YL_TS", "gridcode", "AREA"]))
trlx_sample_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(temp_out_features, ["YL", "TS", "TZ", soil_prop_name]))
# 获取土壤类型图斑面积
trlx_area_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(trlx_features, ["YL", "TS", "Shape@Area"]))
# 处理表3数据
final_dataframe = process_data_for_table3(soil_prop_name,trlx_sample_df, trlx_zhitu_df, trlx_area_df, target_areas_df)
# print(final_dataframe)
# 生成表3
write_to_excel_table3(final_dataframe, output_excel_path, prop_config)
# 母岩母质表
# write_to_excel_table4(df_mymz, output_excel4_path, prop_config)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

View File

@@ -0,0 +1,465 @@
# -*- coding: utf-8 -*-
import os
import re
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font, Border, Side, Alignment
from openpyxl.utils import get_column_letter
from tools.config.pandas_field_cal_func import calculate_muyan, calculate_muzhi
from tools.config.custom_sort import yl_order, ts_order
from tools.core.utils.os_utils import temp_files_processor
# --- 2. 辅助函数 ---
def get_prop_level(prop_level):
"""根据输入值判断 返回等级"""
if pd.isna(prop_level) or prop_level == 0:
return "-"
# 请根据您的实际分级标准调整这里的阈值
if int(prop_level) == 5 or prop_level == "砂质":
return "砂质"
elif int(prop_level) == 4 or prop_level == "砂壤质":
return "砂壤质"
elif int(prop_level) == 3 or prop_level == "壤质":
return "壤质"
elif int(prop_level) == 1 or prop_level == "黏壤质":
return "黏壤质"
elif int(prop_level) == 2 or prop_level == "黏质":
return "黏质"
else:
return "-"
# 判断单元格类型
def get_merge_type(merged_range):
"""
判断合并类型
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None不是合并单元格
"""
if not merged_range:
return None
min_row, max_row = merged_range.min_row, merged_range.max_row
min_col, max_col = merged_range.min_col, merged_range.max_col
if max_row > min_row and max_col > min_col:
return 'both' # 同时跨行和跨列
elif max_row > min_row:
return 'row' # 行合并(垂直合并)
elif max_col > min_col:
return 'column' # 列合并(水平合并)
else:
return None # 实际上不是合并单元格
# --- 3. 数据处理与分析 均值---
def process_data_for_table3(soil_prop_name, df_trlx_sample, df_trlx_zhitu, df_trlx, target_areas_df):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns) -> pd.DataFrame:
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_name
sample_fields = ['YL', 'TS', field_name]
df_samples = clean_df(df_trlx_sample, sample_fields)
df_samples["GRIDCODE"] = df_samples[field_name]
# 通过土属计算母岩母质
df_samples['母岩'] = df_samples['TS'].apply(calculate_muyan)
df_samples['母质'] = df_samples['母岩'].apply(calculate_muzhi)
# 按 YJDL, EJDL 分组
df_sample_means = df_samples.groupby(['YL', 'TS', 'GRIDCODE']).size().reset_index(name="样点数")
total_sample_count = df_sample_means['样点数'].sum()
df_sample_means['样点数占比'] = df_sample_means['样点数'] / total_sample_count
# df_sample_mymz = df_samples.groupby(['母质', '母岩', 'TZ'])[field_name].agg(['count', 'mean', 'median']).reset_index()
# print(df_sample_mymz)
# ==b. 处理制图数据,获各等级制图面积
df_trlx_zhitu["YL"] = df_trlx_zhitu['YL_TS'].apply(lambda x: x.split('_')[0])
df_trlx_zhitu["TS"] = df_trlx_zhitu["YL_TS"].apply(lambda x: x.split('_')[1])
df_trlx_zhitu.columns = df_trlx_zhitu.columns.str.upper()
df_trlx_zhitu = clean_df(df_trlx_zhitu, ['YL', 'TS'])
df_trlx_zhitu['GRIDCODE'] = df_trlx_zhitu['GRIDCODE'].apply(get_prop_level)
df_map_data = df_trlx_zhitu.groupby(["YL","TS", "GRIDCODE"]).agg({"AREA": "sum"}).reset_index()
df_map_data['制图面积_原始'] = df_map_data['AREA'] * 0.0015 # 单位:亩
# ==c. 处理制图数据,获各等级制图面积
df_trlx = clean_df(df_trlx, ['YL', 'TS'])
df_trlx["面积"] = df_trlx["Shape@Area"] * 0.0015
# 拿到目标df总面积计算比例进行平差
target_areas = target_areas_df['面积'].sum()
original_area = df_trlx['面积'].sum()
adjusted_area_yz = target_areas / original_area
df_trlx["面积"] = df_trlx["面积"] * adjusted_area_yz
df_trlx_area = df_trlx.groupby(['YL', 'TS'])['面积'].sum().reset_index()
df_trlx_area['面积'] = pd.to_numeric(df_trlx_area['面积'], errors='coerce').fillna(0)
# ==========================
# 第三步:按二级地类分组计算平差系数
# 先计算每个二级地类的原始合计面积
ts_original_sum = df_map_data.groupby('TS')['制图面积_原始'].sum().reset_index()
ts_original_sum.rename(columns={'制图面积_原始': '原始合计面积'}, inplace=True)
# 合并目标面积
ts_adj = pd.merge(ts_original_sum, df_trlx_area, on='TS', how='left')
ts_adj.rename(columns={'面积': '目标合计面积'}, inplace=True)
# 填充无目标面积的二级地类(目标面积=原始面积,平差系数=1
ts_adj['目标合计面积'] = ts_adj['目标合计面积'].fillna(ts_adj['原始合计面积'])
# 计算平差系数(目标面积 / 原始面积避免除以0
ts_adj['平差系数'] = ts_adj['目标合计面积'] / ts_adj['原始合计面积'].replace(0, 1)
ts_adj['平差系数'] = ts_adj['平差系数'].fillna(1) # 极端情况填充1
# 第四步:应用平差系数到每个质地级别的制图面积
df_map_data = pd.merge(df_map_data, ts_adj[['TS', '平差系数']], on='TS', how='left')
df_map_data['平差系数'] = df_map_data['平差系数'].fillna(1) # 未匹配到的二级地类系数=1
# 计算平差后的制图面积
df_map_data['制图面积'] = df_map_data['制图面积_原始'] * df_map_data['平差系数']
# 重新计算面积占比(基于平差后的面积)
total_adjusted_area = df_map_data['制图面积'].sum()
df_map_data['面积占比'] = df_map_data['制图面积'] / total_adjusted_area
df_map_data = clean_df(df_map_data, ['YL', 'TS'])
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['YL', 'TS', 'GRIDCODE']],
df_map_data[['YL', 'TS', 'GRIDCODE']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['YL', 'TS', 'GRIDCODE'], how='left')
df_final = pd.merge(df_final, df_map_data, on=['YL', 'TS', 'GRIDCODE'], how='left')
# (可选) 按“亚类”和“土属”排序
in_yl_order = yl_order + [x for x in df_final['YL'].unique() if x not in yl_order]
in_ts_order = ts_order + [x for x in df_final['TS'].unique() if x not in ts_order]
df_final["YL"] = pd.Categorical(df_final['YL'], categories=in_yl_order, ordered=True)
df_final["TS"] = pd.Categorical(df_final['TS'], categories=in_ts_order, ordered=True)
df_final["GRIDCODE"] = pd.Categorical(df_final['GRIDCODE'], categories=sorted(df_final['GRIDCODE'].unique()), ordered=True)
df_final.sort_values(['YL', 'TS', 'GRIDCODE'], inplace=True)
print("数据处理流程完成!")
return df_final
# return df_final, df_sample_mymz
# 写入EXCEL 表2
def write_to_excel_table3(df, output_path, prop_config:dict):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同土壤类型属性变化统计"
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
cell_font = Font(name='等线', size=11)
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
def apply_style(cell_range, font, alignment=None, border=None):
for row in ws[cell_range]:
for cell in row:
cell.font = font
if alignment: cell.alignment = alignment
if border: cell.border = border
# --- b. 绘制表头 ---
ws.merge_cells('A1:B1'); ws['A1'] = '土壤类型'
ws.merge_cells('C1:E1'); ws['C1'] = '样点统计'
ws.merge_cells('F1:G1'); ws['F1'] = '制图统计'
ws['A2'] = '亚类'
ws['B2'] = '土属'
ws['C2'] = '质地类型'
ws['D2'] = '数量/个'
ws['E2'] = '占比%'
ws['F2'] = '面积/亩'
ws['G2'] = '占比%'
level_dict = prop_config['标准等级']
# 创建两个列表来分别存储上段和下段范围
upper_ranges = {value: key for key, value in level_dict.items()}
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('YL', sort=False, observed=True):
if group_yl_df.empty:
continue
print(f"正在写入亚类: {yl}...")
yl_start_row = current_row
# 按二级地类分组
for ts, group_ts_df in group_yl_df.groupby('TS', sort=False, observed=False):
if group_ts_df.empty:
continue
print(f"正在写入二级地类: {ts}...")
ts_start_row = current_row
# 遍历该亚类下下的所有“土属”
for _, row_data in group_ts_df.iterrows():
ws.cell(row=current_row, column=3).value = row_data['GRIDCODE']
# 填充样点数据
ws.cell(row=current_row, column=4).value = row_data['样点数'] if not np.isnan(row_data['样点数']) else '-'
ws.cell(row=current_row, column=5).value = round(row_data['样点数占比']*100, 2) if not np.isnan(row_data['样点数占比']) else '-'
# 填充制图数据
ws.cell(row=current_row, column=6).value = round(row_data['制图面积'], 0) if not np.isnan(row_data['制图面积']) else '-'
ws.cell(row=current_row, column=7).value = round(row_data['面积占比']*100, 2) if not np.isnan(row_data['面积占比']) else '-'
current_row += 1
# 合并二级地类单元格
if ts_start_row <= current_row:
ws.merge_cells(start_row=ts_start_row, start_column=2, end_row=current_row-1, end_column=2)
ws.cell(row=ts_start_row, column=2).value = ts
# 合并“一级地类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row-1, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
# 计算全区的均值、范围、数量
total_areas = df_to_write['制图面积'].sum()
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=1).value = '全区'
ws.cell(row=current_row, column=4).value = df_to_write['样点数'].sum()
ws.cell(row=current_row, column=5).value = round(df_to_write['样点数占比'].sum()*100, 2)
ws.cell(row=current_row, column=6).value = round(total_areas, 0)
ws.cell(row=current_row, column=7).value = round(df_to_write['面积占比'].sum()*100, 2)
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
apply_style(f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
dims = {}
for row in ws.rows:
for cell in row:
if cell.value:
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
if get_merge_type(merged_range) == 'column':
continue
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
# 设置列宽
for col, value in dims.items():
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
# 母岩母质表
def write_to_excel_table4(df:pd.DataFrame, output_path, prop_config):
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "母岩母质土壤属性统计"
# 获取属性单位
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅','全钾','沙粒','粉粒','粘粒']
prop_name_str = prop_config.get('项目分级','')
if prop_name_str:
prop_name = prop_name_str.split('\n')[0].strip() in special_prop
else:
prop_name = False
prop_unit_str = prop_config.get('分级标准', '')
if prop_unit_str:
prop_unit = prop_unit_str.split('\n')[1].strip()
else:
prop_unit = ''
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
cell_font = Font(name='等线', size=11)
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
def apply_style(cell_range, font, alignment=None, border=None):
for row in ws[cell_range]:
for cell in row:
cell.font = font
if alignment: cell.alignment = alignment
if border: cell.border = border
# 写入表头
headers = ['母岩母质','', '土种类型', '样点统计', '']
ws.append(headers)
ws.append(['', '', '', f'均值/{prop_unit}', '数量/个'])
# 合并表头单元格
ws.merge_cells('A1:B2') # 母岩母质
ws.merge_cells('C1:C2') # 土种类型
ws.merge_cells('D1:E1') # 样点统计
current_row = 3
# 按母质和母岩进行分组
grouped = df.groupby(['母质', '母岩']).agg({
'TZ': lambda x: ''.join(x), # 将土种名称用逗号连接
'mean': 'mean', # 计算均值
'count': 'sum' # 计算总数
}).reset_index()
parent_materials = grouped['母质'].unique()
for parent_material in parent_materials:
parent_material_row = current_row
if parent_material == '未知':
continue
material_group = grouped[grouped['母质'] == parent_material]
# 写入母岩母质分组(只在第一行显示)
first_row_in_group = True
for _, row_data in material_group.iterrows():
if first_row_in_group:
# 第一行显示母岩母质名称
ws.cell(row=current_row, column=1, value=parent_material)
first_row_in_group = False
else:
# 后续行留空
ws.cell(row=current_row, column=1, value='')
# 写入母岩类型
ws.cell(row=current_row, column=2, value=row_data['母岩'])
# 写入土种类型(所有土种用逗号连接)
ws.cell(row=current_row, column=3, value=row_data['TZ'])
# 写入统计数据
ws.cell(row=current_row, column=4, value=round(row_data['mean'], 1))
ws.cell(row=current_row, column=5, value=row_data['count'])
current_row += 1
# 合并母岩母质分组
if parent_material_row < current_row:
ws.merge_cells(start_row=parent_material_row, start_column=1, end_row=current_row - 1, end_column=1)
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=1, value='全区')
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
apply_style(f'A1:{max_col_letter}2', header_font)
# 设置列宽
ws.column_dimensions["A"].width = 20
ws.column_dimensions["B"].width = 20
ws.column_dimensions["C"].width = 30
ws.column_dimensions["D"].width = 20
ws.column_dimensions["E"].width = 20
# 保存文件
wb.save(output_path)
print(f"数据已成功写入到 {output_path}")
def main(gdb_path, soil_prop_name, trlx_features, reclassed_feature, output_path,target_areas_df, prop_config):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path,f"{soil_prop_name}不同土壤类型土壤.xlsx") # 生成的Excel报告文件路径
# output_excel4_path = os.path.join(output_path,f"{soil_prop_name}不同母岩母质土壤属性.xlsx")
soil_prop_features = os.path.join(gdb_path,soil_prop_name)
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
temp_out_features = r"in_memory/temp_out_type_features"
out_table_mean = r"in_memory/out_table_type_mean"
temp_files.append(temp_out_features)
temp_files.append(out_table_mean)
# 2. 用样点进行空间连接到土壤类型图斑
fields_to_keep = {
soil_prop_features: [soil_prop_name],
trlx_features: ["YL", "TS", "TZ"],
}
field_mappings = arcpy.FieldMappings()
for join_features in fields_to_keep.keys():
for field_name in fields_to_keep[join_features]:
try:
field_map = arcpy.FieldMap()
field_map.addInputField(join_features, field_name)
field_map.mergeRule = "First" # 对所有连接字段使用 "First" 规则
field_mappings.addFieldMap(field_map)
except Exception as e:
print(f"警告: 添加字段 '{field_name}' (来自 '{join_features}') 时出错,将跳过。错误信息: {e}")
# 空间连接
arcpy.analysis.SpatialJoin(soil_prop_features, trlx_features, temp_out_features, "JOIN_ONE_TO_ONE", "KEEP_ALL",field_mappings, "INTERSECT")
# 3. 交集制表计算每个TRZD的面积
arcpy.analysis.TabulateIntersection(trlx_features, "YL_TS", reclassed_feature, out_table_mean, "gridcode", out_units="SQUARE_METERS")
trlx_zhitu_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YL_TS", "gridcode", "AREA"]))
trlx_sample_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(temp_out_features, ["YL", "TS", "TZ", soil_prop_name]))
# 获取土壤类型图斑面积
trlx_area_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(trlx_features, ["YL", "TS", "Shape@Area"]))
# 处理表3数据
final_dataframe = process_data_for_table3(soil_prop_name,trlx_sample_df, trlx_zhitu_df, trlx_area_df, target_areas_df)
# print(final_dataframe)
# 生成表3
write_to_excel_table3(final_dataframe, output_excel_path, prop_config)
# 母岩母质表
# write_to_excel_table4(df_mymz, output_excel4_path, prop_config)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

View File

@@ -0,0 +1,512 @@
# -*- coding: utf-8 -*-
import os
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font
from openpyxl.utils import get_column_letter
from tools.config.pandas_field_cal_func import calculate_muyan, calculate_muzhi
from tools.config.custom_sort import yl_order, ts_order
from tools.core.utils.os_utils import temp_files_processor
from tools.core.utils.excel_utils import ExcelStyleUtils
# --- 3. 数据处理与分析 均值---
def process_data_for_table3(soil_prop_name, df_trlx_sample, df_trlx_zhitu, df_trlx, target_areas_df):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns) -> pd.DataFrame:
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_name
sample_fields = ['YL', 'TS', field_name]
df_samples = clean_df(df_trlx_sample, sample_fields)
df_samples[field_name] = df_samples[field_name].astype(float)
# 通过土属计算母岩母质
df_samples['母岩'] = df_samples['TS'].apply(calculate_muyan)
df_samples['母质'] = df_samples['母岩'].apply(calculate_muzhi)
# 按 YJDL, EJDL 分组,计算 dPH 的均值
df_sample_means = df_samples.groupby(['YL', 'TS'])[field_name].agg(['count', 'max', 'min', 'mean', 'median']).reset_index()
df_sample_mymz = df_samples.groupby(['母质', '母岩', 'TZ'])[field_name].agg(['count', 'mean', 'median']).reset_index()
# print(df_sample_mymz)
# ==b. 处理制图数据,获各等级制图面积
df_trlx_zhitu["YL"] = df_trlx_zhitu['YL_TS'].apply(lambda x: x.split('_')[0])
df_trlx_zhitu["TS"] = df_trlx_zhitu["YL_TS"].apply(lambda x: x.split('_')[1])
df_trlx_zhitu = clean_df(df_trlx_zhitu, ['YL', 'TS'])
df_trlx_zhitu.rename(columns={'MEAN': '制图均值', 'COUNT': '制图样点数'}, inplace=True)
# ==c. 处理制图数据,获各等级制图面积
df_trlx = clean_df(df_trlx, ['YL', 'TS'])
df_trlx["面积_亩"] = df_trlx["Shape@Area"] * 0.0015
filtered_props = ['ECA', 'EMG', 'ACU', 'AZN', 'AFE', 'AMN', 'AMO', 'AB', 'AS1', 'TSE']
# 拿到目标df总面积计算比例进行平差
print(target_areas_df)
if soil_prop_name == "GZCHD":
target_areas = target_areas_df[target_areas_df['EJDL'] == '耕地']['面积'].values[0]
elif soil_prop_name in filtered_props:
target_areas = target_areas_df[target_areas_df['EJDL'].isin(['耕地', '园地'])]['面积'].sum()
else:
target_areas = target_areas_df['面积'].sum()
original_area = df_trlx['面积_亩'].sum()
adjusted_area_yz = target_areas / original_area
df_trlx["面积_亩"] = df_trlx["面积_亩"] * adjusted_area_yz
df_trlx_area = df_trlx.groupby(['YL', 'TS'])['面积_亩'].sum().reset_index()
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['YL', 'TS']],
df_trlx_zhitu[['YL', 'TS']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['YL', 'TS'], how='left')
df_final = pd.merge(df_final, df_trlx_zhitu, on=['YL', 'TS'], how='left')
df_final = pd.merge(df_final, df_trlx_area, on=['YL', 'TS'], how='left')
# (可选) 按“亚类”和“土属”排序
in_yl_order = yl_order + [x for x in df_final['YL'].unique() if x not in yl_order]
in_ts_order = ts_order + [x for x in df_final['TS'].unique() if x not in ts_order]
df_final["YL"] = pd.Categorical(df_final['YL'], categories=in_yl_order, ordered=True)
df_final["TS"] = pd.Categorical(df_final['TS'], categories=in_ts_order, ordered=True)
df_final.sort_values(['YL', 'TS'], inplace=True)
print("数据处理流程完成!")
return df_final, df_sample_mymz
# 写入EXCEL 表2
def write_to_excel_table3(df, output_path, prop_config:dict, stats):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同土壤类型属性变化统计"
# 获取属性单位
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅','全钾']
fsn_props = ['砂粒含量','粉粒含量','黏粒含量']
prop_name_str = prop_config.get('项目分级','')
if prop_name_str:
split_name = prop_name_str.split('\n')[0].strip()
if split_name in special_prop:
prop_name = '1f'
elif split_name in fsn_props:
prop_name = '0f'
else:
prop_name = '2f'
else:
prop_name = '1f'
prop_unit_str = prop_config.get('分级标准', '')
if prop_unit_str:
prop_unit = prop_unit_str.split('\n')[1].strip()
else:
prop_unit = ''
# --- b. 绘制表头 ---
ws.merge_cells('A1:B1'); ws['A1'] = '土壤类型'
ws.merge_cells('C1:F1'); ws['C1'] = '样点统计'
ws.merge_cells('G1:H1'); ws['G1'] = '制图统计'
ws['A2'] = '亚类'
ws['B2'] = '土属'
ws['C2'] = '均值/' + prop_unit
ws['D2'] = '中位值/' + prop_unit
ws['E2'] = '范围/' + prop_unit
ws['F2'] = '数量/个'
ws['G2'] = '均值/' + prop_unit
ws['H2'] = '面积/亩'
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('YL', sort=False, observed=True):
print(f"正在写入亚类: {yl}...")
yl_start_row = current_row
# 遍历该亚类下下的所有“土属”
for _, row_data in group_yl_df.iterrows():
ws.cell(row=current_row, column=2).value = row_data['TS']
# 填充样点数据
sample_mean = row_data.get('mean')
if pd.notna(sample_mean):
ws.cell(row=current_row, column=3).value = f"{sample_mean:.{prop_name}}"
ws.cell(row=current_row, column=4).value = f"{row_data.get('median', '-'):.{prop_name}}"
ws.cell(row=current_row, column=5).value = f"{row_data.get('min', '-'):.{prop_name}}{row_data.get('max', '-'):.{prop_name}}"
ws.cell(row=current_row, column=6).value = row_data.get('count', '-')
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = "-"
ws.cell(row=current_row, column=5).value = "-"
ws.cell(row=current_row, column=6).value = "-"
# 填充制图数据
map_mean = row_data.get('制图均值')
if pd.notna(map_mean):
ws.cell(row=current_row, column=7).value = f"{map_mean:.{prop_name}}"
ws.cell(row=current_row, column=8).value = f"{row_data.get('面积_亩', '-'):.0f}"
else:
ws.cell(row=current_row, column=7).value = "-"
ws.cell(row=current_row, column=8).value = "-"
current_row += 1
# 计算并写入“合计”行
if ws.cell(row=current_row-1, column=2).value in ["林地", "草地", "其他"]:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=yl_start_row, end_column=2)
ws.cell(row=yl_start_row, column=1).value = yl
continue
ws.cell(row=current_row, column=2).value = '合计'
# 计算合计行的均值 (均值的均值)
total_count = group_yl_df['count'].sum()
weighted_sum = group_yl_df['mean'] * group_yl_df['count']
if not weighted_sum.empty and total_count != 0:
total_sample_mean = weighted_sum.sum() / total_count
else:
total_sample_mean = None
total_median = group_yl_df['median'].mean()
min_min, max_max = group_yl_df['min'].min(), group_yl_df['max'].max()
if pd.notna(total_sample_mean):
ws.cell(row=current_row, column=3).value = f"{total_sample_mean:.{prop_name}}"
ws.cell(row=current_row, column=4).value = f"{total_median:.{prop_name}}"
ws.cell(row=current_row, column=5).value = f"{min_min:.{prop_name}}{max_max:.{prop_name}}"
ws.cell(row=current_row, column=6).value = f"{total_count:.0f}"
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = "-"
ws.cell(row=current_row, column=5).value = "-"
ws.cell(row=current_row, column=6).value = "-"
# b. **【核心修正】: 计算合计行的“制图均值”(加权平均)**
# 准备加权平均的分子和分母
weighted_sum = 0
total_count = 0
# 遍历当前一级地类分组中的每一行
for _, row in group_yl_df.iterrows():
mean_val = row.get('制图均值')
count_val = row.get('制图样点数')
# 只有当均值和样点数都存在且有效时,才参与计算
if pd.notna(mean_val) and pd.notna(count_val) and count_val > 0:
weighted_sum += mean_val * count_val # Σ (mean * count)
total_count += count_val # Σ (count)
# 计算加权平均值
weighted_avg = (weighted_sum / total_count) if total_count > 0 else 0
total_area = group_yl_df['面积_亩'].sum()
if weighted_avg > 0:
ws.cell(row=current_row, column=7).value = f"{weighted_avg:.{prop_name}}"
ws.cell(row=current_row, column=8).value = f"{total_area:.0f}"
else:
ws.cell(row=current_row, column=7).value = "-"
ws.cell(row=current_row, column=8).value = "-"
# 合并“一级地类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
current_row += 1
# 计算全区的均值、范围、数量
# total_counts = df_to_write['count'].sum()
# total_weighted_sum = df_to_write['mean'] * df_to_write['count']
# total_mean = total_weighted_sum.sum() / total_counts
# total_median = df_to_write['median'].mean()
total_range = f"{df_to_write['min'].min():.{prop_name}}{df_to_write['max'].max():.{prop_name}}"
total_zhitu_weighted_sum = df_to_write['制图均值'] * df_to_write['面积_亩']
total_areas = df_to_write['面积_亩'].sum()
total_zhitu_mean = total_zhitu_weighted_sum.sum() / total_areas
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=2)
ws.cell(row=current_row, column=1).value = '全区'
ws.cell(row=current_row, column=3).value = f"{stats['mean']:.{prop_name}}"
ws.cell(row=current_row, column=4).value = f"{stats['median']:.{prop_name}}"
ws.cell(row=current_row, column=5).value = total_range
ws.cell(row=current_row, column=6).value = f"{stats['count']:.0f}"
ws.cell(row=current_row, column=7).value = f"{total_zhitu_mean:.{prop_name}}"
ws.cell(row=current_row, column=8).value = f"{total_areas:.0f}"
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}{current_row}')
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
# 设置列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
# 母岩母质表
def write_to_excel_table4(df:pd.DataFrame, output_path, prop_config, stats):
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "母岩母质土壤属性统计"
# 获取属性单位
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅','全钾']
fsn_props = ['砂粒含量','粉粒含量','黏粒含量','有效土层厚度']
prop_name_str = prop_config.get('项目分级','')
if prop_name_str:
split_name = prop_name_str.split('\n')[0].strip()
if split_name in special_prop:
prop_name = '1f'
elif split_name in fsn_props:
prop_name = '0f'
else:
prop_name = '2f'
else:
prop_name = '1f'
prop_unit_str = prop_config.get('分级标准', '')
if prop_unit_str:
prop_unit = prop_unit_str.split('\n')[1].strip()
else:
prop_unit = ''
# 写入表头
headers = ['母岩母质','', '土种类型', '样点统计', '']
ws.append(headers)
ws.append(['', '', '', f'均值/{prop_unit}', '数量/个'])
# 合并表头单元格
ws.merge_cells('A1:B2') # 母岩母质
ws.merge_cells('C1:C2') # 土种类型
ws.merge_cells('D1:E1') # 样点统计
current_row = 3
# 按母质和母岩进行分组
grouped = df.groupby(['母质', '母岩']).agg({
'TZ': lambda x: ''.join(x), # 将土种名称用逗号连接
'mean': 'mean', # 计算均值
'count': 'sum' # 计算总数
}).reset_index()
parent_materials = grouped['母质'].unique()
for parent_material in parent_materials:
parent_material_row = current_row
if parent_material == '未知':
continue
material_group = grouped[grouped['母质'] == parent_material]
# 写入母岩母质分组(只在第一行显示)
first_row_in_group = True
for _, row_data in material_group.iterrows():
if first_row_in_group:
# 第一行显示母岩母质名称
ws.cell(row=current_row, column=1, value=parent_material)
first_row_in_group = False
else:
# 后续行留空
ws.cell(row=current_row, column=1, value='')
# 写入母岩类型
ws.cell(row=current_row, column=2, value=row_data['母岩'])
# 写入土种类型(所有土种用逗号连接)
ws.cell(row=current_row, column=3, value=row_data['TZ'])
# 写入统计数据
ws.cell(row=current_row, column=4, value=round(row_data['mean'], 1))
ws.cell(row=current_row, column=5, value=row_data['count'])
current_row += 1
# 合并母岩母质分组
if parent_material_row < current_row:
ws.merge_cells(start_row=parent_material_row, start_column=1, end_row=current_row - 1, end_column=1)
# 计算合计值并写入
# total_mean = 0
# total_count = df['count'].sum()
# total_sum = df['mean'] * df['count']
# if total_count and total_count!=0:
# total_mean = total_sum.sum() / total_count
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=1, value='全区')
ws.cell(row=current_row, column=4, value=f"{stats['mean']:.{prop_name}}")
ws.cell(row=current_row, column=5, value=f"{stats['count']:.0f}")
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}{current_row}')
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
# 设置列宽
ws.column_dimensions["A"].width = 20
ws.column_dimensions["B"].width = 20
ws.column_dimensions["C"].width = 30
ws.column_dimensions["D"].width = 20
ws.column_dimensions["E"].width = 20
# 保存文件
wb.save(output_path)
print(f"数据已成功写入到 {output_path}")
def main(gdb_path, soil_prop_name, trlx_features, soil_prop_tif, output_path,target_areas_df, prop_config, dltb_features):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path,f"{soil_prop_name}不同土壤类型土壤.xlsx") # 生成的Excel报告文件路径
output_excel4_path = os.path.join(output_path,f"{soil_prop_name}不同母岩母质土壤属性.xlsx")
soil_prop_features = os.path.join(gdb_path,soil_prop_name)
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
if soil_prop_name == "GZCHD":
temp_gdtb_trlx_out = r"in_memory/temp_gdtb_trlx_out"
temp_gdtb_trlx = r"in_memory/temp_gdtb_trlx"
temp_files.append(temp_gdtb_trlx)
temp_out_features = r"in_memory/temp_out_type_features"
out_table_mean = r"in_memory/out_table_type_mean"
temp_files.append(temp_out_features)
temp_files.append(out_table_mean)
# 2. 用样点进行空间连接到土壤类型图斑
fields_to_keep = {
soil_prop_features: [soil_prop_name],
trlx_features: ["YL", "TS", "TZ"],
}
field_mappings = arcpy.FieldMappings()
for join_features in fields_to_keep.keys():
for field_name in fields_to_keep[join_features]:
try:
field_map = arcpy.FieldMap()
field_map.addInputField(join_features, field_name)
field_map.mergeRule = "First" # 对所有连接字段使用 "First" 规则
field_mappings.addFieldMap(field_map)
except Exception as e:
print(f"警告: 添加字段 '{field_name}' (来自 '{join_features}') 时出错,将跳过。错误信息: {e}")
# 定义需要过滤地类的属性列表
filtered_props = ['ECA', 'EMG', 'ACU', 'AZN', 'AFE', 'AMN', 'AMO', 'AB', 'AS1', 'TSE']
# 空间连接
arcpy.analysis.SpatialJoin(soil_prop_features, trlx_features, temp_out_features, "JOIN_ONE_TO_ONE", "KEEP_ALL",field_mappings, "INTERSECT")
if soil_prop_name == "GZCHD":
arcpy.analysis.Intersect([trlx_features, dltb_features], temp_gdtb_trlx, 'NO_FID')
arcpy.conversion.ExportFeatures(temp_gdtb_trlx,temp_gdtb_trlx_out,"DLBM LIKE '01%'")
# 3. 以表格显示分区统计 计算均值
arcpy.sa.ZonalStatisticsAsTable(temp_gdtb_trlx_out, "YL_TS", soil_prop_tif, out_table_mean, "DATA", "MEAN")
trlx_area_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(temp_gdtb_trlx_out, ["YL", "TS", "Shape@Area"]))
# 如果当前属性在列表中,则只统计耕地和园地
elif soil_prop_name in filtered_props:
temp_gdtb_trlx_filtered = r"in_memory/temp_gdtb_trlx_filtered"
temp_gdtb_trlx_out_filtered = r"in_memory/temp_gdtb_trlx_out_filtered"
temp_files.append(temp_gdtb_trlx_filtered)
temp_files.append(temp_gdtb_trlx_out_filtered)
# 交集土壤类型与土地利用图斑
arcpy.analysis.Intersect([trlx_features, dltb_features], temp_gdtb_trlx_filtered, 'NO_FID')
# 导出耕地和园地DLBM LIKE '01%' OR DLBM LIKE '02%'
arcpy.conversion.ExportFeatures(temp_gdtb_trlx_filtered, temp_gdtb_trlx_out_filtered, "DLBM LIKE '01%' OR DLBM LIKE '02%'")
# 使用过滤后的图斑进行分区统计(制图均值)
arcpy.sa.ZonalStatisticsAsTable(temp_gdtb_trlx_out_filtered, "YL_TS", soil_prop_tif, out_table_mean, "DATA", "MEAN")
# 获取过滤后的面积
trlx_area_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(temp_gdtb_trlx_out_filtered, ["YL", "TS", "Shape@Area"]))
print(f"过滤制图数据仅统计耕地和园地DLBM LIKE '01%' OR '02%'")
else:
# 3. 以表格显示分区统计 计算均值
arcpy.sa.ZonalStatisticsAsTable(trlx_features, "YL_TS", soil_prop_tif, out_table_mean, "DATA", "MEAN")
# 获取土壤类型图斑面积
trlx_area_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(trlx_features, ["YL", "TS", "Shape@Area"]))
trlx_zhitu_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YL_TS", "MEAN", "COUNT"]))
trlx_sample_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(temp_out_features, ["YL", "TS", "TZ", soil_prop_name]))
stat_sample = {
'min': trlx_sample_df[soil_prop_name].min(),
'max': trlx_sample_df[soil_prop_name].max(),
'mean':trlx_sample_df[soil_prop_name].mean(),
'median': trlx_sample_df[soil_prop_name].median(),
'count': trlx_sample_df[soil_prop_name].count()
}
# 处理表3数据
final_dataframe, df_mymz = process_data_for_table3(soil_prop_name,trlx_sample_df, trlx_zhitu_df, trlx_area_df, target_areas_df)
# print(final_dataframe)
# 生成表3
write_to_excel_table3(final_dataframe, output_excel_path, prop_config, stat_sample)
# 母岩母质表
write_to_excel_table4(df_mymz, output_excel4_path, prop_config,stat_sample)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

File diff suppressed because it is too large Load Diff

View File