Files
ArcGis_Py/tools/core/acid_stats/土地利用类型酸化统计表.py
2026-04-22 12:27:49 +08:00

610 lines
28 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
import os
import arcpy
import pandas as pd
import numpy as np
from collections import OrderedDict
from openpyxl import Workbook
from openpyxl.styles import Font
from openpyxl.utils import get_column_letter
from tools.config.arcgis_field_cal_code import codeblock_cal_shfj, codeblock_dltb_ejdl, codeblock_dltb_yjdl
from tools.core.utils.excel_utils import ExcelStyleUtils
yjdl_order = ["耕地", "园地", "林地", "草地", "其他"]
ejdl_order = ["水田", "旱地", "水浇地", "果园", "茶园", "橡胶园", "其他园地"]
# --- 2. 辅助函数 ---
# 等级计算
def get_acidification_degree(delta_ph):
"""根据ΔpH值判断酸化程度"""
if pd.isna(delta_ph) or delta_ph == 0:
return "-"
# 请根据您的实际分级标准调整这里的阈值
if delta_ph > 1.0:
return "重度酸化"
elif 0.5 < delta_ph <= 1.0:
return "中度酸化"
elif 0.3 < delta_ph <= 0.5:
return "轻度酸化"
elif -0.3 <= delta_ph <= 0.3:
return "未酸化"
else: # dPH < -0.3
return "碱化"
# --- 3. 数据处理与分析 均值---
def process_data_for_table5_3(gdb_path, mean_table_name, sample_table_name):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("【最终版 v2】开始处理数据...")
def clean_df(df, columns):
# ... (此函数不变)
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# --- a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
sample_table_path = os.path.join(gdb_path, sample_table_name)
sample_fields = ['YJDL', 'EJDL', 'dPH']
df_samples = pd.DataFrame(arcpy.da.TableToNumPyArray(sample_table_path, sample_fields,'dPH>0.3', skip_nulls=False))
df_samples = clean_df(df_samples, ['YJDL', 'EJDL'])
# 按 YJDL, EJDL 分组,计算 dPH 的均值
df_sample_means = df_samples.groupby(['YJDL', 'EJDL'])['dPH'].mean().reset_index()
df_sample_means.rename(columns={'dPH': '样点均值'}, inplace=True)
print("样点均值计算完成。")
# --- b. 处理制图数据,获取“制图均值”和“制图样点数” ---
print("--> 步骤2: 获取制图均值和样点数...")
mean_table_path = os.path.join(gdb_path, mean_table_name)
# **【核心修改】: 增加读取 COUNT 字段**
mean_fields = ['YJDL', 'EJDL', 'MEAN', 'COUNT']
df_map_data = pd.DataFrame(arcpy.da.TableToNumPyArray(mean_table_path, mean_fields, skip_nulls=False))
df_map_data = clean_df(df_map_data, ['YJDL', 'EJDL'])
df_map_data.rename(columns={'MEAN': '制图均值', 'COUNT': '制图样点数'}, inplace=True)
print("制图数据获取完成。")
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['YJDL', 'EJDL']],
df_map_data[['YJDL', 'EJDL']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['YJDL', 'EJDL'], how='left')
# **【核心修改】: 合并整个 df_map_data而不仅仅是均值列**
df_final = pd.merge(df_final, df_map_data, on=['YJDL', 'EJDL'], how='left')
# --- d. 计算酸化程度 ---
print("--> 步骤4: 计算酸化程度...")
# **【核心修改】: 在计算酸化程度之前,先过滤掉不展示的行**
# 我们只对 dPH 在酸化范围内 ( > 0.3) 的数据感兴趣
# 但为了计算合计,我们需要保留所有数据,所以这一步只计算,不删除
df_final['酸化程度_样本'] = df_final['样点均值'].apply(get_acidification_degree)
df_final['酸化程度_制图'] = df_final['制图均值'].apply(get_acidification_degree)
# (可选) 按“一级地类”和“二级地类”排序
in_ejdl_order = ejdl_order + [x for x in df_final['EJDL'].unique() if x not in ejdl_order]
df_final["YJDL"] = pd.Categorical(df_final['YJDL'], categories=yjdl_order, ordered=True)
df_final["EJDL"] = pd.Categorical(df_final['EJDL'], categories=in_ejdl_order, ordered=True)
df_final.sort_values(['YJDL', 'EJDL'], inplace=True)
print("数据处理流程完成!")
return df_final
# --- 4. Excel 制表 均值---
def write_to_excel_table5_3(df, output_path):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同土地利用类型pH变化统计"
# --- b. 绘制表头 ---
ws.merge_cells('A1:B1'); ws['A1'] = '土地利用类型'
ws.merge_cells('C1:F1'); ws['C1'] = 'ΔpH'
ws['A2'] = '一级'
ws['B2'] = '二级'
ws['C2'] = '样点均值'
ws['D2'] = '酸化程度'
ws['E2'] = '制图均值'
ws['F2'] = '酸化程度'
# --- c. 填充数据 ---
current_row = 3
# **【核心修改】: 先对整个DataFrame进行过滤只保留需要展示的行**
# 只有当“样点酸化程度”或“制图酸化程度”不为“未酸化”、“碱化”或“-”时,才展示该行
acid_levels_to_show = ["轻度酸化", "中度酸化", "重度酸化"]
df_to_write = df[
df['酸化程度_样本'].isin(acid_levels_to_show) |
df['酸化程度_制图'].isin(acid_levels_to_show)
].copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('YJDL', sort=False, observed=False):
print(f"正在写入一级地类: {yl}...")
yl_start_row = current_row
# 遍历该一级地类下的所有“二级地类”
for _, row_data in group_yl_df.iterrows():
ws.cell(row=current_row, column=2).value = row_data['EJDL']
# 填充样点数据
sample_mean = row_data.get('样点均值')
if pd.notna(sample_mean):
ws.cell(row=current_row, column=3).value = f"{sample_mean:.2f}" if sample_mean > 0.3 else "-"
ws.cell(row=current_row, column=4).value = row_data.get('酸化程度_样本', '-') if sample_mean > 0.3 else "-"
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = "-"
# 填充制图数据
map_mean = row_data.get('制图均值')
if pd.notna(map_mean):
ws.cell(row=current_row, column=5).value = f"{map_mean:.2f}" if map_mean > 0.3 else "-"
ws.cell(row=current_row, column=6).value = row_data.get('酸化程度_制图', '-') if map_mean > 0.3 else "-"
else:
ws.cell(row=current_row, column=5).value = "-"
ws.cell(row=current_row, column=6).value = "-"
current_row += 1
# 计算并写入“合计”行
if ws.cell(row=current_row-1, column=2).value in ["林地", "草地", "其他"]:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=yl_start_row, end_column=2)
ws.cell(row=yl_start_row, column=1).value = yl
continue
ws.cell(row=current_row, column=2).value = '合计'
# 计算合计行的均值 (均值的均值)
total_sample_mean = group_yl_df['样点均值'].mean()
if pd.notna(total_sample_mean):
ws.cell(row=current_row, column=3).value = f"{total_sample_mean:.2f}"
ws.cell(row=current_row, column=4).value = get_acidification_degree(total_sample_mean)
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = "-"
# b. **【核心修正】: 计算合计行的“制图均值”(加权平均)**
# 准备加权平均的分子和分母
weighted_sum = 0
total_count = 0
# 遍历当前一级地类分组中的每一行
for _, row in group_yl_df.iterrows():
mean_val = row.get('制图均值')
count_val = row.get('制图样点数')
# 只有当均值和样点数都存在且有效时,才参与计算
if pd.notna(mean_val) and pd.notna(count_val) and count_val > 0:
weighted_sum += mean_val * count_val # Σ (mean * count)
total_count += count_val # Σ (count)
# 计算加权平均值
weighted_avg = (weighted_sum / total_count) if total_count > 0 else 0
if weighted_avg > 0:
ws.cell(row=current_row, column=5).value = f"{weighted_avg:.2f}"
ws.cell(row=current_row, column=6).value = get_acidification_degree(weighted_avg)
else:
ws.cell(row=current_row, column=5).value = "-"
ws.cell(row=current_row, column=6).value = "-"
# 合并“一级地类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
current_row += 1
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}{current_row-1}')
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
# 自动调整列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
# --- 2. 数据处理与分析 (使用 Pandas) ---
def process_data_for_table5_4(gdb_path, area_table_name, sample_table_name, target_area_dict):
"""
【最终修正版 v2】: 先建立统一的层级结构,再分别合并统计结果。
"""
print("【最终修正版 v2】开始处理数据...")
def clean_df(df, columns):
# ... (此函数不变)
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# --- a. 从两个表中提取并建立唯一的 (YJDL, EJDL) 层级结构 "骨架" ---
print("--> 步骤1: 建立统一的层级结构...")
sample_table_path = os.path.join(gdb_path, sample_table_name)
area_table_path = os.path.join(gdb_path, area_table_name)
df_samples_raw = pd.DataFrame(arcpy.da.TableToNumPyArray(sample_table_path, ['YJDL', 'EJDL'], skip_nulls=False))
df_area_raw = pd.DataFrame(arcpy.da.TableToNumPyArray(area_table_path, ['YJDL', 'EJDL'], skip_nulls=False))
# 清理并合并两个表中的 (YJDL, EJDL) 组合
df_samples_raw = clean_df(df_samples_raw, ['YJDL', 'EJDL'])
df_area_raw = clean_df(df_area_raw, ['YJDL', 'EJDL'])
# 使用 concat 连接两个DataFrame然后用 drop_duplicates 去除重复的组合
df_skeleton = pd.concat([df_samples_raw, df_area_raw]).drop_duplicates().reset_index(drop=True)
if df_skeleton.empty:
print("警告: 无法从源数据中建立任何有效的 (YJDL, EJDL) 层级结构。")
return pd.DataFrame(), {}
print(f"已建立包含 {len(df_skeleton)} 个唯一土壤类型的层级结构。")
# --- b. 独立统计样点数据 ---
print("--> 步骤2: 独立统计样点数据...")
df_samples = pd.DataFrame(arcpy.da.TableToNumPyArray(sample_table_path, ['EJDL', 'YJDL', 'dPH'], skip_nulls=False))
df_samples = clean_df(df_samples, ['YJDL', 'EJDL'])
if not df_samples.empty:
# ... (统计逻辑不变)
bins = [-np.inf, -0.3, 0.3, 0.5, 1.0, np.inf]
labels = ["碱化", "未酸化", "轻度酸化", "中度酸化", "重度酸化"]
df_samples['SHFJ'] = pd.cut(df_samples['dPH'], bins=bins, labels=labels, right=True)
sample_counts = df_samples.groupby(['YJDL', 'EJDL', 'SHFJ'], observed=False).size().reset_index(name='样点数')
ts_total_samples = sample_counts.groupby(['YJDL', 'EJDL'])['样点数'].transform('sum')
sample_counts['样点占比'] = (sample_counts['样点数'] / ts_total_samples) * 100
df_sample_stats = sample_counts.pivot_table(
index=['YJDL', 'EJDL'], columns='SHFJ', values=['样点数', '样点占比'], fill_value=0, observed=False
).reset_index()
df_sample_stats.columns = [f'{col[0]}_{col[1]}'.strip('_') if col[1] else col[0] for col in df_sample_stats.columns]
# 将样点统计结果合并到骨架上
df_final = pd.merge(df_skeleton, df_sample_stats, on=['YJDL', 'EJDL'], how='left')
else:
df_final = df_skeleton.copy()
# --- c. 独立统计面积数据 ---
print("--> 步骤3: 独立统计面积数据...")
df_area = pd.DataFrame(arcpy.da.TableToNumPyArray(area_table_path, ['EJDL', 'YJDL', 'SHFJ', 'AREA'], skip_nulls=False))
df_area = clean_df(df_area, ['YJDL', 'EJDL'])
if not df_area.empty:
# 计算平差系数
landuse_types = {'耕地':'01', '园地':'02', '林地':'03', '草地':'04', '其他':'12'}
df_area['AREA_MU'] = df_area['AREA'] * 0.0015
yjdl_area = df_area.groupby(['YJDL'])['AREA_MU'].sum().reset_index()
yjdl_area.columns = ['YJDL', 'ORIGINAL_TOTAL_MU']
adjustment_factors = []
for _, row in yjdl_area.iterrows():
yjdl = row['YJDL']
original_total = row['ORIGINAL_TOTAL_MU']
target_total = target_area_dict.get(landuse_types[yjdl], original_total) # 如果没有指定,就用原始面积
adjustment_factor = target_total / original_total
adjustment_factors.append({
'YJDL': yjdl,
'原始总面积_亩': original_total,
'目标总面积_亩': target_total,
'平差系数': adjustment_factor
})
factor_df = pd.DataFrame(adjustment_factors)
# 4. 对每个二级地类应用平差系数
# 合并原始数据和平差系数
df_with_factors = df_area.merge(factor_df[['YJDL', '平差系数']], on='YJDL')
df_with_factors['制图面积_亩'] = df_with_factors['AREA_MU'] * df_with_factors['平差系数']
ts_total_area = df_with_factors.groupby(['YJDL', 'EJDL'])['制图面积_亩'].transform('sum')
df_with_factors['面积占比'] = (df_with_factors['制图面积_亩'] / ts_total_area) * 100
df_area_stats = df_with_factors.pivot_table(
index=['YJDL', 'EJDL'], columns='SHFJ', values=['制图面积_亩', '面积占比'], fill_value=0
).reset_index()
df_area_stats.columns = [f'{col[0]}_{col[1]}'.strip('_') if col[1] else col[0] for col in df_area_stats.columns]
# 将面积统计结果合并到 df_final 上
# 注意,这里我们合并到已经包含样点数据的 df_final 上
df_final = pd.merge(df_final, df_area_stats, on=['YJDL', 'EJDL'], how='left')
# --- d. 最后清理和构建映射 ---
df_final.fillna(0, inplace=True)
print("--> 步骤4: 自动构建层级结构...")
dynamic_soil_mapping = df_final.groupby('YJDL')['EJDL'].unique().apply(list).to_dict()
dynamic_soil_mapping = OrderedDict(sorted(dynamic_soil_mapping.items(),key=lambda item: yjdl_order.index(item[0])))
in_ejdl_order = ejdl_order + [x for x in df_final['EJDL'].unique() if x not in ejdl_order]
for yl in dynamic_soil_mapping:
# dynamic_soil_mapping[yl].sort()
dynamic_soil_mapping[yl] = sorted( dynamic_soil_mapping[yl], key=lambda x: in_ejdl_order.index(x))
print("数据处理流程完成!")
return df_final, dynamic_soil_mapping
# --- 3. Excel 制表 面积---
def write_to_excel_table5_4(df, soil_mapping, output_path):
"""
【最终修正版】: 将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel将创建一个空的报告。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同类型土壤酸化程度统计"
# --- b. 绘制表头 (不变) ---
ws.merge_cells('A1:B1'); ws['A1'] = '土地利用类型'
ws['A2'] = '一级'
ws['B2'] = '二级'
acid_levels = ['轻度酸化', '中度酸化', '重度酸化']
all_possible_levels = ['碱化', '未酸化', '轻度酸化', '中度酸化', '重度酸化']
acid_level_headers = ['轻度酸化(0.3<ΔpH≤0.5)', '中度酸化(0.5<ΔpH≤1.0)', '重度酸化(ΔpH>1.0)']
col_start = 3
for header in acid_level_headers:
ws.merge_cells(start_row=1, start_column=col_start, end_row=1, end_column=col_start + 3)
ws.cell(row=1, column=col_start).value = header
ws.cell(row=2, column=col_start).value = '样点数/个'
ws.cell(row=2, column=col_start + 1).value = '占比/%'
ws.cell(row=2, column=col_start + 2).value = '制图面积/亩'
ws.cell(row=2, column=col_start + 3).value = '占比/%'
col_start += 4
# --- c. 填充数据 (完全重构的逻辑) ---
current_row = 3
# 使用 .groupby('YJDL', sort=False) 来保证我们之前设置的排序顺序
for yl, ts_list in soil_mapping.items():
# **【关键】** group_yl 是一个只包含当前一级地类数据的子DataFrame
# 我们可以安全地在这个子DataFrame上进行迭代和计算
print(f"正在写入一级地类: {yl}...")
yl_start_row = current_row
# 筛选出当前一级地类的所有数据
group_yl_df = df[df['YJDL'] == yl]
# 1. 遍历该一级地类下的所有“二级地类”并写入数据
for ts in ts_list:
ws.cell(row=current_row, column=2).value = ts
# 在子集中查找当前二级地类的数据行
row_data = group_yl_df[group_yl_df['EJDL'] == ts]
# --- 填充单元格的逻辑开始 ---
col_start = 3 # 从第 C 列开始填充
# 检查是否找到了该土属的数据
if not row_data.empty:
# 如果找到了数据 (row_data 不为空),我们就获取这一行的数据
# .iloc[0] 获取第一行(也是唯一一行)的数据,作为一个 Series 对象
data_series = row_data.iloc[0]
# 遍历每一个酸化等级,填充对应的四列数据
for level in acid_levels:
# 1. 构建要从 data_series 中查找的列名
sample_col = f'样点数_{level}'
sample_pct_col = f'样点占比_{level}'
area_col = f'制图面积_亩_{level}'
area_pct_col = f'面积占比_{level}'
# 2. 从 data_series 中安全地获取值
# 使用 .get(key, default_value) 的好处是,如果列名不存在,它会返回默认值(0),而不会报错
sample_val = data_series.get(sample_col, 0)
sample_pct_val = data_series.get(sample_pct_col, 0)
area_val = data_series.get(area_col, 0)
area_pct_val = data_series.get(area_pct_col, 0)
# 3. 将获取到的值填入单元格
# - 对于数值我们判断它是否大于0。如果是就填入数值否则填入 "-"
# - 对于样点数,我们将其转为整数
# - 对于占比和面积,我们保留两位小数
# 样点数/个
ws.cell(row=current_row, column=col_start).value = int(sample_val) if sample_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 1).value = f"{sample_pct_val:.2f}%" if sample_val > 0 else "-"
# 制图面积/万亩
ws.cell(row=current_row, column=col_start + 2).value = f"{area_val:.0f}" if area_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 3).value = f"{area_pct_val:.2f}%" if area_val > 0 else "-"
# 移动到下一个酸化等级的起始列
col_start += 4
else:
# 如果没有找到该土属的数据 (row_data 为空)
# 这意味着该土属在源数据中不存在任何样点或面积信息
# 我们将整行所有统计单元格都填充为 "-"
# acid_levels 列表包含3个等级每个等级4列总共12列
for _ in range(len(acid_levels) * 4):
ws.cell(row=current_row, column=col_start).value = "-"
col_start += 1
# --- 填充单元格的逻辑结束 ---
# 完成一行填充后行号加1为下一行做准备
current_row += 1
# 2. 计算并写入这个一级地类的“合计”行
if ws.cell(row=current_row-1, column=2).value in ["林地","草地", "其他"]:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=yl_start_row, end_column=2)
ws.cell(row=yl_start_row, column=1).value = yl
continue
ws.cell(row=current_row, column=2).value = '合计'
# 计算总样点数和总面积,仅针对当前 group_yl
yl_grand_total_samples = 0
for lvl in all_possible_levels:
if f'样点数_{lvl}' in group_yl_df:
yl_grand_total_samples += group_yl_df[f'样点数_{lvl}'].sum()
yl_grand_total_area = 0
for lvl in all_possible_levels:
if f'制图面积_亩_{lvl}' in group_yl_df:
yl_grand_total_area += group_yl_df[f'制图面积_亩_{lvl}'].sum()
col_start = 3
for level in acid_levels:
sample_sum = group_yl_df.get(f'样点数_{level}', 0).sum()
col_name = f'制图面积_亩_{level}'
area_sum = group_yl_df[col_name].sum() if col_name in group_yl_df else 0
# area_sum = group_yl_df.get(f'制图面积_亩_{level}', 0).sum()
sample_perc = (sample_sum / yl_grand_total_samples * 100) if yl_grand_total_samples > 0 else 0
area_perc = (area_sum / yl_grand_total_area * 100) if yl_grand_total_area > 0 else 0
ws.cell(row=current_row, column=col_start).value = int(sample_sum) if sample_sum > 0 else "-"
ws.cell(row=current_row, column=col_start + 1).value = f"{sample_perc:.2f}%" if sample_sum > 0 else "-"
ws.cell(row=current_row, column=col_start + 2).value = f"{area_sum:.0f}" if area_sum > 0 else "-"
ws.cell(row=current_row, column=col_start + 3).value = f"{area_perc:.2f}%" if area_sum > 0 else "-"
col_start += 4
# 3. 合并“一级地类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
current_row += 1
# --- a. 定义样式 (不变) ---
header_font = Font(name='等线', size=11, bold=True)
# --- d. 应用样式和调整列宽 (最终健壮版) ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}{current_row-1}')
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
# 调整列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
def main(gdb_path:str, ph_features:str,dltb_class_feature:str, shph_tif:str, output_path:str,target_areas_dict:dict):
try:
# --- 1. 用户配置 ---
# 输出配置
output_excel_path = os.path.join(output_path, "土地利用类型酸化统计表.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
sample_table_name = "历史样点PH信息_Table" # 图2: 样点信息表名
in_zone_feature = dltb_class_feature # 地类图斑
in_class_feature = ph_features # 已重分类好的酸化PH图层
in_value_raster = shph_tif # 赋值栅格,酸化PH栅格
out_table_area = r"土地利用类型_酸化面积表" # 输出的面积统计表名
out_table_mean = r"土地利用类型_酸化均值表" # 输出的均值表名
print("开始处理数据...")
if not arcpy.Exists(out_table_area):
# 判断输入表是否存在SHFJ字段
try:
if not arcpy.ListFields(in_zone_feature, "EJDL"):
arcpy.management.CalculateField(in_zone_feature, "EJDL", "calculate_ejdl(!DLBM!,!DLMC!)", "PYTHON3", codeblock_dltb_ejdl)
arcpy.management.CalculateField(in_zone_feature, "YJDL", "calculate_yjdl(!DLBM!)", "PYTHON3", codeblock_dltb_yjdl)
if not arcpy.ListFields(in_class_feature, "SHFJ"):
arcpy.management.CalculateField(in_class_feature, "SHFJ", "calculate_shfj(!gridcode!)", "PYTHON3", codeblock_cal_shfj)
except Exception as e:
print(f"计算SHFJ字段时发生错误: {e}")
# 拿到地类图斑的坐标系
desc = arcpy.Describe(in_zone_feature)
spatial_ref = desc.spatialReference
# 1.用arcpy.analysis.TabulateIntersection进行交集制表,面积使用地类图斑投影坐标系下面积
with arcpy.EnvManager(outputCoordinateSystem=spatial_ref):
arcpy.analysis.TabulateIntersection(
in_zone_feature,
["YJDL", "EJDL"],
in_class_feature,
out_table_area,
"SHFJ",
out_units="SQUARE_METERS",
)
if not arcpy.Exists(out_table_mean):
# 判断输入表是否存在YJDL_EJDL字段
if not arcpy.ListFields(in_zone_feature, "YJDL_EJDL"):
# 如果不存在,则添加该字段
arcpy.management.AddField(in_zone_feature, "YJDL_EJDL", "TEXT")
# 计算YJDL_EJDL字段的值
arcpy.management.CalculateField(in_zone_feature,"YJDL_EJDL","!YJDL! + '_' + !EJDL!","PYTHON3")
# 2.用arcpy.sa.ZonalStatisticsAsTable进行区域统计
mean_table = arcpy.sa.ZonalStatisticsAsTable(
in_zone_feature, "YJDL_EJDL", in_value_raster, out_table_mean, "DATA", "MEAN"
)
# 2.1 添加土壤类型字段并计算
arcpy.management.AddFields(
out_table_mean,
[["YJDL", "TEXT"],["EJDL", "TEXT"]],
)
arcpy.management.CalculateField(mean_table, "YJDL", "!YJDL_EJDL!.split('_')[0]", "PYTHON3")
arcpy.management.CalculateField(mean_table, "EJDL", "!YJDL_EJDL!.split('_')[1]", "PYTHON3")
# 生成表5.4的面积统计Excel报告
final_dataframe, soil_structure = process_data_for_table5_4(gdb_path, out_table_area, sample_table_name,target_areas_dict)
write_to_excel_table5_4(final_dataframe, soil_structure, output_excel_path)
# 生成表5.3的均值统计Excel报告
final_mean_dataframe = process_data_for_table5_3(gdb_path, out_table_mean, sample_table_name)
write_to_excel_table5_3(final_mean_dataframe, output_excel_path.replace(".xlsx", "_mean.xlsx"))
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()