Files
ArcGis_Py/tools/core/acid_stats/土壤类型图酸化统计表.py
2026-04-22 12:27:49 +08:00

630 lines
28 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
import os
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font
from openpyxl.utils import get_column_letter
from tools.config.arcgis_field_cal_code import codeblock_cal_shfj
from tools.core.utils.excel_utils import ExcelStyleUtils
from tools.config.custom_sort import yl_order, ts_order
# --- 2. 辅助函数 ---
# 获取要素类各酸化等级面积
def get_acid_area_by_group(target_area_df):
try:
# 转为numpy数组供pandas统计使用
df = target_area_df.copy()
area_by_group = df.groupby("SHFJ")["AREA_MU"].sum()
for key in area_by_group.keys():
area_by_group[key] = area_by_group[key]
return area_by_group.to_dict()
except Exception as e:
print(f"计算面积时出错: {str(e)}")
return None
def apply_adjustment_by_each_level(df, target_area_dict):
"""
对DataFrame中的面积数据按每一个酸化等级独立进行平差。
参数:
df (pd.DataFrame): 包含面积统计的DataFrame。
target_area_dict (dict): 每个酸化等级的目标总面积字典。
例如: {'轻度酸化': 10000.0, '中度酸化': 8000.0, ...}
"""
print("\n开始按每个酸化等级独立进行平差...")
df_adjusted = df.copy()
for level, target_area in target_area_dict.items():
col_name = f'制图面积_亩_{level}'
adjusted_col_name = f'平差后面积_亩_{level}'
if col_name not in df.columns:
print(f"警告: 未找到列 '{col_name}',跳过该等级平差。")
if adjusted_col_name not in df_adjusted.columns:
df_adjusted[adjusted_col_name] = 0 # 创建一个空列
continue
# a. 计算该等级的实际总面积
actual_area = df_adjusted[col_name].sum()
if actual_area > 0:
# b. 计算误差
error = target_area - actual_area
print(f"等级 '{level}': 目标面积={target_area:.2f}, 实际面积={actual_area:.2f}, 误差={error:.2f}")
# c. 按比例分配误差
adjustment = error * (df_adjusted[col_name] / actual_area)
df_adjusted[adjusted_col_name] = df_adjusted[col_name] + adjustment
df_adjusted[adjusted_col_name] = df_adjusted[adjusted_col_name].clip(lower=0)
else:
df_adjusted[adjusted_col_name] = df_adjusted[col_name]
print("按每个酸化等级独立平差完成。")
return df_adjusted
# 获取酸化程度
def get_acidification_degree(delta_ph):
"""根据ΔpH值判断酸化程度"""
if pd.isna(delta_ph) or delta_ph == 0:
return "-"
# 请根据您的实际分级标准调整这里的阈值
if delta_ph > 1.0:
return "重度酸化"
elif 0.5 < delta_ph <= 1.0:
return "中度酸化"
elif 0.3 < delta_ph <= 0.5:
return "轻度酸化"
elif -0.3 <= delta_ph <= 0.3:
return "未酸化"
else: # dPH < -0.3
return "碱化"
# --- 3. 数据处理与分析 均值表---
def process_data_for_table5_5(gdb_path, mean_table_name, sample_table_name):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("【最终版 v2】开始处理数据...")
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# --- a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
sample_table_path = os.path.join(gdb_path, sample_table_name)
sample_fields = ['YL', 'TS', 'dPH']
df_samples = pd.DataFrame(arcpy.da.TableToNumPyArray(sample_table_path, sample_fields, 'dPH>0.3', skip_nulls=False))
df_samples = clean_df(df_samples, ['YL', 'TS'])
# 按 YL, TS 分组,计算 dPH 的均值
df_sample_means = df_samples.groupby(['YL', 'TS'])['dPH'].mean().reset_index()
df_sample_means.rename(columns={'dPH': '样点均值'}, inplace=True)
print("样点均值计算完成。")
# --- b. 处理制图数据,获取“制图均值”和“制图样点数” ---
print("--> 步骤2: 获取制图均值和样点数...")
mean_table_path = os.path.join(gdb_path, mean_table_name)
mean_fields = ['YL', 'TS', 'MEAN', 'COUNT']
df_map_data = pd.DataFrame(arcpy.da.TableToNumPyArray(mean_table_path, mean_fields, skip_nulls=False))
df_map_data = clean_df(df_map_data, ['YL', 'TS'])
df_map_data.rename(columns={'MEAN': '制图均值', 'COUNT': '制图样点数'}, inplace=True)
print("制图数据获取完成。")
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['YL', 'TS']],
df_map_data[['YL', 'TS']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['YL', 'TS'], how='left')
# **【核心修改】: 合并整个 df_map_data而不仅仅是均值列**
df_final = pd.merge(df_final, df_map_data, on=['YL', 'TS'], how='left')
# --- d. 计算酸化程度 ---
print("--> 步骤4: 计算酸化程度...")
# **【核心修改】: 在计算酸化程度之前,先过滤掉不展示的行**
# 我们只对 dPH 在酸化范围内 ( > 0.3) 的数据感兴趣
# 但为了计算合计,我们需要保留所有数据,所以这一步只计算,不删除
df_final['酸化程度_样本'] = df_final['样点均值'].apply(get_acidification_degree)
df_final['酸化程度_制图'] = df_final['制图均值'].apply(get_acidification_degree)
# (可选) 按“亚类”和“土属”排序
in_yl_order = yl_order + [x for x in df_final['YL'].unique() if x not in yl_order]
in_ts_order = ts_order + [x for x in df_final['TS'].unique() if x not in ts_order]
df_final["YL"] = pd.Categorical(df_final['YL'], categories=in_yl_order, ordered=True)
df_final["TS"] = pd.Categorical(df_final['TS'], categories=in_ts_order, ordered=True)
df_final.sort_values(['YL', 'TS'], inplace=True)
print("数据处理流程完成!")
return df_final
# --- 4. Excel 制表 均值表---
def write_to_excel_table5_5(df, output_path):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同类型土壤pH变化统计"
# --- b. 绘制表头 ---
ws.merge_cells('A1:A2'); ws['A1'] = '亚类'
ws.merge_cells('B1:B2'); ws['B1'] = '土属'
ws.merge_cells('C1:F1'); ws['C1'] = 'ΔpH'
ws['C2'] = '样点均值'
ws['D2'] = '酸化程度'
ws['E2'] = '制图均值'
ws['F2'] = '酸化程度'
# --- c. 填充数据 ---
current_row = 3
# **【核心修改】: 先对整个DataFrame进行过滤只保留需要展示的行**
# 只有当“样点酸化程度”或“制图酸化程度”不为“未酸化”、“碱化”或“-”时,才展示该行
acid_levels_to_show = ["轻度酸化", "中度酸化", "重度酸化"]
df_to_write = df[
df['酸化程度_样本'].isin(acid_levels_to_show) |
df['酸化程度_制图'].isin(acid_levels_to_show)
].copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('YL', observed=True, sort=False):
print(f"正在写入亚类: {yl}...")
yl_start_row = current_row
# 遍历该亚类下的所有“土属”
for _, row_data in group_yl_df.iterrows():
ws.cell(row=current_row, column=2).value = row_data['TS']
# 填充样点数据
sample_mean = row_data.get('样点均值')
if pd.notna(sample_mean):
ws.cell(row=current_row, column=3).value = f"{sample_mean:.2f}" if sample_mean > 0.3 else "-"
ws.cell(row=current_row, column=4).value = row_data.get('酸化程度_样本', '-') if sample_mean > 0.3 else "-"
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = "-"
# 填充制图数据
map_mean = row_data.get('制图均值')
if pd.notna(map_mean):
ws.cell(row=current_row, column=5).value = f"{map_mean:.2f}" if map_mean > 0.3 else "-"
ws.cell(row=current_row, column=6).value = row_data.get('酸化程度_制图', '-') if map_mean > 0.3 else "-"
else:
ws.cell(row=current_row, column=5).value = "-"
ws.cell(row=current_row, column=6).value = "-"
current_row += 1
# 计算并写入“合计”行
ws.cell(row=current_row, column=2).value = '合计'
# 计算合计行的均值 (均值的均值)
total_sample_mean = group_yl_df['样点均值'].mean()
if pd.notna(total_sample_mean):
ws.cell(row=current_row, column=3).value = f"{total_sample_mean:.2f}"
ws.cell(row=current_row, column=4).value = get_acidification_degree(total_sample_mean)
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = "-"
# b. **【核心修正】: 计算合计行的“制图均值”(加权平均)**
# 准备加权平均的分子和分母
weighted_sum = 0
total_count = 0
# 遍历当前亚类分组中的每一行
for _, row in group_yl_df.iterrows():
mean_val = row.get('制图均值')
count_val = row.get('制图样点数')
# 只有当均值和样点数都存在且有效时,才参与计算
if pd.notna(mean_val) and pd.notna(count_val) and count_val > 0:
weighted_sum += mean_val * count_val # Σ (mean * count)
total_count += count_val # Σ (count)
# 计算加权平均值
weighted_avg = (weighted_sum / total_count) if total_count > 0 else 0
if weighted_avg > 0:
ws.cell(row=current_row, column=5).value = f"{weighted_avg:.2f}"
ws.cell(row=current_row, column=6).value = get_acidification_degree(weighted_avg)
else:
ws.cell(row=current_row, column=5).value = "-"
ws.cell(row=current_row, column=6).value = "-"
# 合并“亚类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
current_row += 1
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
# --- d. 应用样式和调整列宽 (最终健壮版) ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}{current_row-1}')
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
# 自动调整列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
# --- 2. 数据处理与分析 (面积统计表) ---
def process_data_final(gdb_path, area_table_name, sample_table_name):
"""
【最终修正版 v2】: 先建立统一的层级结构,再分别合并统计结果。
"""
print("【最终修正版 v2】开始处理数据...")
def clean_df(df, columns):
# ... (此函数不变)
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# --- a. 从两个表中提取并建立唯一的 (YL, TS) 层级结构 "骨架" ---
print("--> 步骤1: 建立统一的层级结构...")
sample_table_path = os.path.join(gdb_path, sample_table_name)
area_table_path = os.path.join(gdb_path, area_table_name)
df_samples_raw = pd.DataFrame(arcpy.da.TableToNumPyArray(sample_table_path, ['YL', 'TS'], skip_nulls=False))
df_area_raw = pd.DataFrame(arcpy.da.TableToNumPyArray(area_table_path, ['YL', 'TS'], skip_nulls=False))
# 清理并合并两个表中的 (YL, TS) 组合
df_samples_raw = clean_df(df_samples_raw, ['YL', 'TS'])
df_area_raw = clean_df(df_area_raw, ['YL', 'TS'])
# 使用 concat 连接两个DataFrame然后用 drop_duplicates 去除重复的组合
df_skeleton = pd.concat([df_samples_raw, df_area_raw]).drop_duplicates().reset_index(drop=True)
if df_skeleton.empty:
print("警告: 无法从源数据中建立任何有效的 (YL, TS) 层级结构。")
return pd.DataFrame(), {}
print(f"已建立包含 {len(df_skeleton)} 个唯一土壤类型的层级结构。")
# --- b. 独立统计样点数据 ---
print("--> 步骤2: 独立统计样点数据...")
df_samples = pd.DataFrame(arcpy.da.TableToNumPyArray(sample_table_path, ['TS', 'YL', 'dPH'], skip_nulls=False))
df_samples = clean_df(df_samples, ['YL', 'TS'])
if not df_samples.empty:
bins = [-np.inf, -0.3, 0.3, 0.5, 1.0, np.inf]
labels = ["碱化", "未酸化", "轻度酸化", "中度酸化", "重度酸化"]
df_samples['SHFJ'] = pd.cut(df_samples['dPH'], bins=bins, labels=labels, right=True)
sample_counts = df_samples.groupby(['YL', 'TS', 'SHFJ'], observed=False).size().reset_index(name='样点数')
ts_total_samples = sample_counts.groupby(['YL', 'TS'])['样点数'].transform('sum')
sample_counts['样点占比'] = (sample_counts['样点数'] / ts_total_samples) * 100
df_sample_stats = sample_counts.pivot_table(
index=['YL', 'TS'], columns='SHFJ', values=['样点数', '样点占比'], fill_value=0, observed=False
).reset_index()
df_sample_stats.columns = [f'{col[0]}_{col[1]}'.strip('_') if col[1] else col[0] for col in df_sample_stats.columns]
# 将样点统计结果合并到骨架上
df_final = pd.merge(df_skeleton, df_sample_stats, on=['YL', 'TS'], how='left')
else:
df_final = df_skeleton.copy()
# --- c. 独立统计面积数据 ---
print("--> 步骤3: 独立统计面积数据...")
df_area = pd.DataFrame(arcpy.da.TableToNumPyArray(area_table_path, ['TS', 'YL', 'SHFJ', 'AREA'], skip_nulls=False))
df_area = clean_df(df_area, ['YL', 'TS'])
if not df_area.empty:
df_area['制图面积_亩'] = df_area['AREA'] * 0.0015
ts_total_area = df_area.groupby(['YL', 'TS'])['制图面积_亩'].transform('sum')
df_area['面积占比'] = (df_area['制图面积_亩'] / ts_total_area) * 100
df_area_stats = df_area.pivot_table(
index=['YL', 'TS'], columns='SHFJ', values=['制图面积_亩', '面积占比'], fill_value=0
).reset_index()
df_area_stats.columns = [f'{col[0]}_{col[1]}'.strip('_') if col[1] else col[0] for col in df_area_stats.columns]
# 将面积统计结果合并到 df_final 上
# 注意,这里我们合并到已经包含样点数据的 df_final 上
df_final = pd.merge(df_final, df_area_stats, on=['YL', 'TS'], how='left')
# --- d. 最后清理和构建映射 ---
df_final.fillna(0, inplace=True)
print("--> 步骤4: 自动构建层级结构...")
in_yl_order = yl_order + [x for x in df_final['YL'].unique() if x not in yl_order]
in_ts_order = ts_order + [x for x in df_final['TS'].unique() if x not in ts_order]
df_final["YL"] = pd.Categorical(df_final['YL'], categories=in_yl_order, ordered=True)
df_final["TS"] = pd.Categorical(df_final['TS'], categories=in_ts_order, ordered=True)
df_final.sort_values(['YL', 'TS'], inplace=True)
dynamic_soil_mapping = df_final.groupby('YL', observed=True)['TS'].unique().apply(list).to_dict()
# for yl in dynamic_soil_mapping:
# dynamic_soil_mapping[yl].sort()
print("数据处理流程完成!")
return df_final, dynamic_soil_mapping
# --- 3. Excel 制表 面积统计表 ---
def write_to_excel(df, soil_mapping, output_path):
"""
【最终修正版】: 将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel将创建一个空的报告。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同类型土壤酸化程度统计"
# --- b. 绘制表头 (不变) ---
ws.merge_cells('A1:A2'); ws['A1'] = '亚类'
ws.merge_cells('B1:B2'); ws['B1'] = '土属'
acid_levels = ['轻度酸化', '中度酸化', '重度酸化']
all_possible_levels = ['碱化', '未酸化', '轻度酸化', '中度酸化', '重度酸化']
acid_level_headers = ['轻度酸化(0.3<ΔpH≤0.5)', '中度酸化(0.5<ΔpH≤1.0)', '重度酸化(ΔpH>1.0)']
col_start = 3
for header in acid_level_headers:
ws.merge_cells(start_row=1, start_column=col_start, end_row=1, end_column=col_start + 3)
ws.cell(row=1, column=col_start).value = header
ws.cell(row=2, column=col_start).value = '样点数/个'
ws.cell(row=2, column=col_start + 1).value = '占比/%'
ws.cell(row=2, column=col_start + 2).value = '制图面积/亩'
ws.cell(row=2, column=col_start + 3).value = '占比/%'
col_start += 4
# --- c. 填充数据 (完全重构的逻辑) ---
current_row = 3
# 使用 .groupby('YL', sort=False) 来保证我们之前设置的排序顺序
for yl, ts_list in soil_mapping.items():
# **【关键】** group_yl 是一个只包含当前亚类数据的子DataFrame
# 我们可以安全地在这个子DataFrame上进行迭代和计算
print(f"正在写入亚类: {yl}...")
yl_start_row = current_row
# 筛选出当前亚类的所有数据
group_yl_df = df[df['YL'] == yl]
# 1. 遍历该亚类下的所有“土属”并写入数据
for ts in ts_list:
ws.cell(row=current_row, column=2).value = ts
# 在子集中查找当前土属的数据行
row_data = group_yl_df[group_yl_df['TS'] == ts]
# --- 填充单元格的逻辑开始 ---
col_start = 3 # 从第 C 列开始填充
# 检查是否找到了该土属的数据
if not row_data.empty:
# 如果找到了数据 (row_data 不为空),我们就获取这一行的数据
# .iloc[0] 获取第一行(也是唯一一行)的数据,作为一个 Series 对象
data_series = row_data.iloc[0]
# 遍历每一个酸化等级,填充对应的四列数据
for level in acid_levels:
# 1. 构建要从 data_series 中查找的列名
sample_col = f'样点数_{level}'
sample_pct_col = f'样点占比_{level}'
area_col = f'平差后面积_亩_{level}'
area_pct_col = f'面积占比_{level}'
# 2. 从 data_series 中安全地获取值
# 使用 .get(key, default_value) 的好处是,如果列名不存在,它会返回默认值(0),而不会报错
sample_val = data_series.get(sample_col, 0)
sample_pct_val = data_series.get(sample_pct_col, 0)
area_val = data_series.get(area_col, 0)
area_pct_val = data_series.get(area_pct_col, 0)
# 3. 将获取到的值填入单元格
# - 对于数值我们判断它是否大于0。如果是就填入数值否则填入 "-"
# - 对于样点数,我们将其转为整数
# - 对于占比和面积,我们保留两位小数
# 样点数/个
ws.cell(row=current_row, column=col_start).value = int(sample_val) if sample_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 1).value = f"{sample_pct_val:.2f}%" if sample_val > 0 else "-"
# 制图面积/亩
ws.cell(row=current_row, column=col_start + 2).number_format = "0.00"
ws.cell(row=current_row, column=col_start + 2).value = f"{area_val:.0f}" if area_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 3).value = f"{area_pct_val:.2f}%" if area_val > 0 else "-"
# 移动到下一个酸化等级的起始列
col_start += 4
else:
# 如果没有找到该土属的数据 (row_data 为空)
# 这意味着该土属在源数据中不存在任何样点或面积信息
# 我们将整行所有统计单元格都填充为 "-"
# acid_levels 列表包含3个等级每个等级4列总共12列
for _ in range(len(acid_levels) * 4):
ws.cell(row=current_row, column=col_start).value = "-"
col_start += 1
# --- 填充单元格的逻辑结束 ---
# 完成一行填充后行号加1为下一行做准备
current_row += 1
# 2. 计算并写入这个亚类的“合计”行
ws.cell(row=current_row, column=2).value = '合计'
# 计算总样点数和总面积,仅针对当前 group_yl
yl_grand_total_samples = 0
for lvl in all_possible_levels:
if f'样点数_{lvl}' in group_yl_df:
yl_grand_total_samples += group_yl_df[f'样点数_{lvl}'].sum()
yl_grand_total_area = 0
for lvl in all_possible_levels:
if f'制图面积_亩_{lvl}' in group_yl_df:
yl_grand_total_area += group_yl_df[f'制图面积_亩_{lvl}'].sum()
col_start = 3
for level in acid_levels:
sample_sum = group_yl_df.get(f'样点数_{level}', 0).sum()
col_name = f'制图面积_亩_{level}'
area_sum = group_yl_df[col_name].sum() if col_name in group_yl_df else 0
# area_sum = group_yl_df.get(f'平差后面积_亩_{level}', 0).sum()
sample_perc = (sample_sum / yl_grand_total_samples * 100) if yl_grand_total_samples > 0 else 0
area_perc = (area_sum / yl_grand_total_area * 100) if yl_grand_total_area > 0 else 0
ws.cell(row=current_row, column=col_start).value = int(sample_sum) if sample_sum > 0 else "-"
ws.cell(row=current_row, column=col_start + 1).value = f"{sample_perc:.2f}%" if sample_sum > 0 else "-"
ws.cell(row=current_row, column=col_start + 2).value = f"{area_sum:.0f}" if area_sum > 0 else "-"
ws.cell(row=current_row, column=col_start + 3).value = f"{area_perc:.2f}%" if area_sum > 0 else "-"
col_start += 4
# 3. 合并“亚类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
current_row += 1
# --- a. 定义样式 (不变) ---
header_font = Font(name='等线', size=11, bold=True)
# d. 应用样式和调整列宽
max_col = 2 + len(acid_levels) * 4
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:{get_column_letter(max_col)}{current_row-1}')
ExcelStyleUtils.set_style(ws, f'A1:{get_column_letter(max_col)}2', header_font)
# 调整列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
def main(gdb_path, trlx_polygon, sh_ph_polygon, ph_raster, output_path, target_areas_df):
try:
# --- 1. 用户配置 ---
sample_table_name = "历史样点PH信息_Table" # 图2: 样点信息表名
# 输出配置
output_excel_path = os.path.join(output_path, "土壤类型酸化统计表.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
in_zone_feature = trlx_polygon # 土壤类型图
# in_class_feature = sh_ph_polygon # 已重分类好的酸化PH图层
in_class_feature = "最小面积统计单元"
in_value_raster = ph_raster # 酸化PH栅格
dltb_ph_statstable = "土地利用类型_酸化面积表" # 土壤类型_酸化面积表gdb table
out_table_area = r"土壤类型_酸化面积表" # 输出的交集表名
out_table_mean = r"土壤类型_酸化均值表" # 输出的均值表名
print("开始处理数据...")
if not arcpy.Exists(out_table_area):
# 判断输入表是否存在SHFJ字段
try:
arcpy.management.CalculateField(in_class_feature, "SHFJ", "calculate_shfj(!gridcode!)", "PYTHON3", codeblock_cal_shfj)
except Exception as e:
print(f"计算SHFJ字段时发生错误: {e}")
# 1.用arcpy.analysis.TabulateIntersection进行交集制表
arcpy.analysis.TabulateIntersection(
in_zone_feature,
["TS", "YL"],
in_class_feature,
out_table_area,
"SHFJ",
out_units="SQUARE_METERS",
)
if not arcpy.Exists(out_table_mean):
# 判断输入表是否存在YL_TS字段
if not arcpy.ListFields(in_zone_feature, "YL_TS"):
# 如果不存在,则添加该字段
arcpy.management.AddField(in_zone_feature, "YL_TS", "TEXT")
# 计算YL_TS字段的值
arcpy.management.CalculateField(in_zone_feature,"YL_TS","!YL! + '_' + !TS!","PYTHON3")
# 2.用arcpy.sa.ZonalStatisticsAsTable进行区域统计
mean_table = arcpy.sa.ZonalStatisticsAsTable(
in_zone_feature, "YL_TS", in_value_raster, out_table_mean, "DATA", "MEAN"
)
# 2.1 添加土壤类型字段并计算
arcpy.management.AddFields(
out_table_mean,
[["YL", "TEXT"],["TS", "TEXT"]],
)
arcpy.management.CalculateField(mean_table, "YL", "!YL_TS!.split('_')[0]", "PYTHON3")
arcpy.management.CalculateField(mean_table, "TS", "!YL_TS!.split('_')[1]", "PYTHON3")
# 生成表5.4的面积统计Excel报告
final_dataframe, soil_structure = process_data_final(gdb_path, out_table_area, sample_table_name)
# 统计地类图斑酸化总面积亩
each_acid_area = get_acid_area_by_group(target_areas_df)
print(f"容县土壤类型图斑总 acid 总面积(亩):{each_acid_area}")
# 执行平差计算
if each_acid_area:
adjusted_dataframe = apply_adjustment_by_each_level(final_dataframe, each_acid_area)
print("使用平差值进行修正!")
write_to_excel(adjusted_dataframe, soil_structure, output_excel_path)
else:
print("未使用平差值进行修正!")
write_to_excel(final_dataframe, soil_structure, output_excel_path)
# 生成表5.4的均值统计Excel报告
final_mean_dataframe = process_data_for_table5_5(gdb_path, out_table_mean, sample_table_name)
write_to_excel_table5_5(final_mean_dataframe, output_excel_path.replace(".xlsx", "_mean.xlsx"))
# adjusted_dataframe.to_csv(output_excel_path.replace(".xlsx", "_adjusted.csv"), index=False)
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()