初始化

This commit is contained in:
2026-04-22 12:27:49 +08:00
commit 4857cb6e45
73 changed files with 20927 additions and 0 deletions

4
tools/core/__init__.py Normal file
View File

@@ -0,0 +1,4 @@
"""
核心功能模块
包含独立的功能脚本,可以在独立进程中运行
"""

View File

View File

@@ -0,0 +1,610 @@
# -*- coding: utf-8 -*-
import os
import arcpy
import pandas as pd
import numpy as np
from collections import OrderedDict
from openpyxl import Workbook
from openpyxl.styles import Font
from openpyxl.utils import get_column_letter
from tools.config.arcgis_field_cal_code import codeblock_cal_shfj, codeblock_dltb_ejdl, codeblock_dltb_yjdl
from tools.core.utils.excel_utils import ExcelStyleUtils
yjdl_order = ["耕地", "园地", "林地", "草地", "其他"]
ejdl_order = ["水田", "旱地", "水浇地", "果园", "茶园", "橡胶园", "其他园地"]
# --- 2. 辅助函数 ---
# 等级计算
def get_acidification_degree(delta_ph):
"""根据ΔpH值判断酸化程度"""
if pd.isna(delta_ph) or delta_ph == 0:
return "-"
# 请根据您的实际分级标准调整这里的阈值
if delta_ph > 1.0:
return "重度酸化"
elif 0.5 < delta_ph <= 1.0:
return "中度酸化"
elif 0.3 < delta_ph <= 0.5:
return "轻度酸化"
elif -0.3 <= delta_ph <= 0.3:
return "未酸化"
else: # dPH < -0.3
return "碱化"
# --- 3. 数据处理与分析 均值---
def process_data_for_table5_3(gdb_path, mean_table_name, sample_table_name):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("【最终版 v2】开始处理数据...")
def clean_df(df, columns):
# ... (此函数不变)
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# --- a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
sample_table_path = os.path.join(gdb_path, sample_table_name)
sample_fields = ['YJDL', 'EJDL', 'dPH']
df_samples = pd.DataFrame(arcpy.da.TableToNumPyArray(sample_table_path, sample_fields,'dPH>0.3', skip_nulls=False))
df_samples = clean_df(df_samples, ['YJDL', 'EJDL'])
# 按 YJDL, EJDL 分组,计算 dPH 的均值
df_sample_means = df_samples.groupby(['YJDL', 'EJDL'])['dPH'].mean().reset_index()
df_sample_means.rename(columns={'dPH': '样点均值'}, inplace=True)
print("样点均值计算完成。")
# --- b. 处理制图数据,获取“制图均值”和“制图样点数” ---
print("--> 步骤2: 获取制图均值和样点数...")
mean_table_path = os.path.join(gdb_path, mean_table_name)
# **【核心修改】: 增加读取 COUNT 字段**
mean_fields = ['YJDL', 'EJDL', 'MEAN', 'COUNT']
df_map_data = pd.DataFrame(arcpy.da.TableToNumPyArray(mean_table_path, mean_fields, skip_nulls=False))
df_map_data = clean_df(df_map_data, ['YJDL', 'EJDL'])
df_map_data.rename(columns={'MEAN': '制图均值', 'COUNT': '制图样点数'}, inplace=True)
print("制图数据获取完成。")
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['YJDL', 'EJDL']],
df_map_data[['YJDL', 'EJDL']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['YJDL', 'EJDL'], how='left')
# **【核心修改】: 合并整个 df_map_data而不仅仅是均值列**
df_final = pd.merge(df_final, df_map_data, on=['YJDL', 'EJDL'], how='left')
# --- d. 计算酸化程度 ---
print("--> 步骤4: 计算酸化程度...")
# **【核心修改】: 在计算酸化程度之前,先过滤掉不展示的行**
# 我们只对 dPH 在酸化范围内 ( > 0.3) 的数据感兴趣
# 但为了计算合计,我们需要保留所有数据,所以这一步只计算,不删除
df_final['酸化程度_样本'] = df_final['样点均值'].apply(get_acidification_degree)
df_final['酸化程度_制图'] = df_final['制图均值'].apply(get_acidification_degree)
# (可选) 按“一级地类”和“二级地类”排序
in_ejdl_order = ejdl_order + [x for x in df_final['EJDL'].unique() if x not in ejdl_order]
df_final["YJDL"] = pd.Categorical(df_final['YJDL'], categories=yjdl_order, ordered=True)
df_final["EJDL"] = pd.Categorical(df_final['EJDL'], categories=in_ejdl_order, ordered=True)
df_final.sort_values(['YJDL', 'EJDL'], inplace=True)
print("数据处理流程完成!")
return df_final
# --- 4. Excel 制表 均值---
def write_to_excel_table5_3(df, output_path):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同土地利用类型pH变化统计"
# --- b. 绘制表头 ---
ws.merge_cells('A1:B1'); ws['A1'] = '土地利用类型'
ws.merge_cells('C1:F1'); ws['C1'] = 'ΔpH'
ws['A2'] = '一级'
ws['B2'] = '二级'
ws['C2'] = '样点均值'
ws['D2'] = '酸化程度'
ws['E2'] = '制图均值'
ws['F2'] = '酸化程度'
# --- c. 填充数据 ---
current_row = 3
# **【核心修改】: 先对整个DataFrame进行过滤只保留需要展示的行**
# 只有当“样点酸化程度”或“制图酸化程度”不为“未酸化”、“碱化”或“-”时,才展示该行
acid_levels_to_show = ["轻度酸化", "中度酸化", "重度酸化"]
df_to_write = df[
df['酸化程度_样本'].isin(acid_levels_to_show) |
df['酸化程度_制图'].isin(acid_levels_to_show)
].copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('YJDL', sort=False, observed=False):
print(f"正在写入一级地类: {yl}...")
yl_start_row = current_row
# 遍历该一级地类下的所有“二级地类”
for _, row_data in group_yl_df.iterrows():
ws.cell(row=current_row, column=2).value = row_data['EJDL']
# 填充样点数据
sample_mean = row_data.get('样点均值')
if pd.notna(sample_mean):
ws.cell(row=current_row, column=3).value = f"{sample_mean:.2f}" if sample_mean > 0.3 else "-"
ws.cell(row=current_row, column=4).value = row_data.get('酸化程度_样本', '-') if sample_mean > 0.3 else "-"
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = "-"
# 填充制图数据
map_mean = row_data.get('制图均值')
if pd.notna(map_mean):
ws.cell(row=current_row, column=5).value = f"{map_mean:.2f}" if map_mean > 0.3 else "-"
ws.cell(row=current_row, column=6).value = row_data.get('酸化程度_制图', '-') if map_mean > 0.3 else "-"
else:
ws.cell(row=current_row, column=5).value = "-"
ws.cell(row=current_row, column=6).value = "-"
current_row += 1
# 计算并写入“合计”行
if ws.cell(row=current_row-1, column=2).value in ["林地", "草地", "其他"]:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=yl_start_row, end_column=2)
ws.cell(row=yl_start_row, column=1).value = yl
continue
ws.cell(row=current_row, column=2).value = '合计'
# 计算合计行的均值 (均值的均值)
total_sample_mean = group_yl_df['样点均值'].mean()
if pd.notna(total_sample_mean):
ws.cell(row=current_row, column=3).value = f"{total_sample_mean:.2f}"
ws.cell(row=current_row, column=4).value = get_acidification_degree(total_sample_mean)
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = "-"
# b. **【核心修正】: 计算合计行的“制图均值”(加权平均)**
# 准备加权平均的分子和分母
weighted_sum = 0
total_count = 0
# 遍历当前一级地类分组中的每一行
for _, row in group_yl_df.iterrows():
mean_val = row.get('制图均值')
count_val = row.get('制图样点数')
# 只有当均值和样点数都存在且有效时,才参与计算
if pd.notna(mean_val) and pd.notna(count_val) and count_val > 0:
weighted_sum += mean_val * count_val # Σ (mean * count)
total_count += count_val # Σ (count)
# 计算加权平均值
weighted_avg = (weighted_sum / total_count) if total_count > 0 else 0
if weighted_avg > 0:
ws.cell(row=current_row, column=5).value = f"{weighted_avg:.2f}"
ws.cell(row=current_row, column=6).value = get_acidification_degree(weighted_avg)
else:
ws.cell(row=current_row, column=5).value = "-"
ws.cell(row=current_row, column=6).value = "-"
# 合并“一级地类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
current_row += 1
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}{current_row-1}')
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
# 自动调整列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
# --- 2. 数据处理与分析 (使用 Pandas) ---
def process_data_for_table5_4(gdb_path, area_table_name, sample_table_name, target_area_dict):
"""
【最终修正版 v2】: 先建立统一的层级结构,再分别合并统计结果。
"""
print("【最终修正版 v2】开始处理数据...")
def clean_df(df, columns):
# ... (此函数不变)
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# --- a. 从两个表中提取并建立唯一的 (YJDL, EJDL) 层级结构 "骨架" ---
print("--> 步骤1: 建立统一的层级结构...")
sample_table_path = os.path.join(gdb_path, sample_table_name)
area_table_path = os.path.join(gdb_path, area_table_name)
df_samples_raw = pd.DataFrame(arcpy.da.TableToNumPyArray(sample_table_path, ['YJDL', 'EJDL'], skip_nulls=False))
df_area_raw = pd.DataFrame(arcpy.da.TableToNumPyArray(area_table_path, ['YJDL', 'EJDL'], skip_nulls=False))
# 清理并合并两个表中的 (YJDL, EJDL) 组合
df_samples_raw = clean_df(df_samples_raw, ['YJDL', 'EJDL'])
df_area_raw = clean_df(df_area_raw, ['YJDL', 'EJDL'])
# 使用 concat 连接两个DataFrame然后用 drop_duplicates 去除重复的组合
df_skeleton = pd.concat([df_samples_raw, df_area_raw]).drop_duplicates().reset_index(drop=True)
if df_skeleton.empty:
print("警告: 无法从源数据中建立任何有效的 (YJDL, EJDL) 层级结构。")
return pd.DataFrame(), {}
print(f"已建立包含 {len(df_skeleton)} 个唯一土壤类型的层级结构。")
# --- b. 独立统计样点数据 ---
print("--> 步骤2: 独立统计样点数据...")
df_samples = pd.DataFrame(arcpy.da.TableToNumPyArray(sample_table_path, ['EJDL', 'YJDL', 'dPH'], skip_nulls=False))
df_samples = clean_df(df_samples, ['YJDL', 'EJDL'])
if not df_samples.empty:
# ... (统计逻辑不变)
bins = [-np.inf, -0.3, 0.3, 0.5, 1.0, np.inf]
labels = ["碱化", "未酸化", "轻度酸化", "中度酸化", "重度酸化"]
df_samples['SHFJ'] = pd.cut(df_samples['dPH'], bins=bins, labels=labels, right=True)
sample_counts = df_samples.groupby(['YJDL', 'EJDL', 'SHFJ'], observed=False).size().reset_index(name='样点数')
ts_total_samples = sample_counts.groupby(['YJDL', 'EJDL'])['样点数'].transform('sum')
sample_counts['样点占比'] = (sample_counts['样点数'] / ts_total_samples) * 100
df_sample_stats = sample_counts.pivot_table(
index=['YJDL', 'EJDL'], columns='SHFJ', values=['样点数', '样点占比'], fill_value=0, observed=False
).reset_index()
df_sample_stats.columns = [f'{col[0]}_{col[1]}'.strip('_') if col[1] else col[0] for col in df_sample_stats.columns]
# 将样点统计结果合并到骨架上
df_final = pd.merge(df_skeleton, df_sample_stats, on=['YJDL', 'EJDL'], how='left')
else:
df_final = df_skeleton.copy()
# --- c. 独立统计面积数据 ---
print("--> 步骤3: 独立统计面积数据...")
df_area = pd.DataFrame(arcpy.da.TableToNumPyArray(area_table_path, ['EJDL', 'YJDL', 'SHFJ', 'AREA'], skip_nulls=False))
df_area = clean_df(df_area, ['YJDL', 'EJDL'])
if not df_area.empty:
# 计算平差系数
landuse_types = {'耕地':'01', '园地':'02', '林地':'03', '草地':'04', '其他':'12'}
df_area['AREA_MU'] = df_area['AREA'] * 0.0015
yjdl_area = df_area.groupby(['YJDL'])['AREA_MU'].sum().reset_index()
yjdl_area.columns = ['YJDL', 'ORIGINAL_TOTAL_MU']
adjustment_factors = []
for _, row in yjdl_area.iterrows():
yjdl = row['YJDL']
original_total = row['ORIGINAL_TOTAL_MU']
target_total = target_area_dict.get(landuse_types[yjdl], original_total) # 如果没有指定,就用原始面积
adjustment_factor = target_total / original_total
adjustment_factors.append({
'YJDL': yjdl,
'原始总面积_亩': original_total,
'目标总面积_亩': target_total,
'平差系数': adjustment_factor
})
factor_df = pd.DataFrame(adjustment_factors)
# 4. 对每个二级地类应用平差系数
# 合并原始数据和平差系数
df_with_factors = df_area.merge(factor_df[['YJDL', '平差系数']], on='YJDL')
df_with_factors['制图面积_亩'] = df_with_factors['AREA_MU'] * df_with_factors['平差系数']
ts_total_area = df_with_factors.groupby(['YJDL', 'EJDL'])['制图面积_亩'].transform('sum')
df_with_factors['面积占比'] = (df_with_factors['制图面积_亩'] / ts_total_area) * 100
df_area_stats = df_with_factors.pivot_table(
index=['YJDL', 'EJDL'], columns='SHFJ', values=['制图面积_亩', '面积占比'], fill_value=0
).reset_index()
df_area_stats.columns = [f'{col[0]}_{col[1]}'.strip('_') if col[1] else col[0] for col in df_area_stats.columns]
# 将面积统计结果合并到 df_final 上
# 注意,这里我们合并到已经包含样点数据的 df_final 上
df_final = pd.merge(df_final, df_area_stats, on=['YJDL', 'EJDL'], how='left')
# --- d. 最后清理和构建映射 ---
df_final.fillna(0, inplace=True)
print("--> 步骤4: 自动构建层级结构...")
dynamic_soil_mapping = df_final.groupby('YJDL')['EJDL'].unique().apply(list).to_dict()
dynamic_soil_mapping = OrderedDict(sorted(dynamic_soil_mapping.items(),key=lambda item: yjdl_order.index(item[0])))
in_ejdl_order = ejdl_order + [x for x in df_final['EJDL'].unique() if x not in ejdl_order]
for yl in dynamic_soil_mapping:
# dynamic_soil_mapping[yl].sort()
dynamic_soil_mapping[yl] = sorted( dynamic_soil_mapping[yl], key=lambda x: in_ejdl_order.index(x))
print("数据处理流程完成!")
return df_final, dynamic_soil_mapping
# --- 3. Excel 制表 面积---
def write_to_excel_table5_4(df, soil_mapping, output_path):
"""
【最终修正版】: 将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel将创建一个空的报告。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同类型土壤酸化程度统计"
# --- b. 绘制表头 (不变) ---
ws.merge_cells('A1:B1'); ws['A1'] = '土地利用类型'
ws['A2'] = '一级'
ws['B2'] = '二级'
acid_levels = ['轻度酸化', '中度酸化', '重度酸化']
all_possible_levels = ['碱化', '未酸化', '轻度酸化', '中度酸化', '重度酸化']
acid_level_headers = ['轻度酸化(0.3<ΔpH≤0.5)', '中度酸化(0.5<ΔpH≤1.0)', '重度酸化(ΔpH>1.0)']
col_start = 3
for header in acid_level_headers:
ws.merge_cells(start_row=1, start_column=col_start, end_row=1, end_column=col_start + 3)
ws.cell(row=1, column=col_start).value = header
ws.cell(row=2, column=col_start).value = '样点数/个'
ws.cell(row=2, column=col_start + 1).value = '占比/%'
ws.cell(row=2, column=col_start + 2).value = '制图面积/亩'
ws.cell(row=2, column=col_start + 3).value = '占比/%'
col_start += 4
# --- c. 填充数据 (完全重构的逻辑) ---
current_row = 3
# 使用 .groupby('YJDL', sort=False) 来保证我们之前设置的排序顺序
for yl, ts_list in soil_mapping.items():
# **【关键】** group_yl 是一个只包含当前一级地类数据的子DataFrame
# 我们可以安全地在这个子DataFrame上进行迭代和计算
print(f"正在写入一级地类: {yl}...")
yl_start_row = current_row
# 筛选出当前一级地类的所有数据
group_yl_df = df[df['YJDL'] == yl]
# 1. 遍历该一级地类下的所有“二级地类”并写入数据
for ts in ts_list:
ws.cell(row=current_row, column=2).value = ts
# 在子集中查找当前二级地类的数据行
row_data = group_yl_df[group_yl_df['EJDL'] == ts]
# --- 填充单元格的逻辑开始 ---
col_start = 3 # 从第 C 列开始填充
# 检查是否找到了该土属的数据
if not row_data.empty:
# 如果找到了数据 (row_data 不为空),我们就获取这一行的数据
# .iloc[0] 获取第一行(也是唯一一行)的数据,作为一个 Series 对象
data_series = row_data.iloc[0]
# 遍历每一个酸化等级,填充对应的四列数据
for level in acid_levels:
# 1. 构建要从 data_series 中查找的列名
sample_col = f'样点数_{level}'
sample_pct_col = f'样点占比_{level}'
area_col = f'制图面积_亩_{level}'
area_pct_col = f'面积占比_{level}'
# 2. 从 data_series 中安全地获取值
# 使用 .get(key, default_value) 的好处是,如果列名不存在,它会返回默认值(0),而不会报错
sample_val = data_series.get(sample_col, 0)
sample_pct_val = data_series.get(sample_pct_col, 0)
area_val = data_series.get(area_col, 0)
area_pct_val = data_series.get(area_pct_col, 0)
# 3. 将获取到的值填入单元格
# - 对于数值我们判断它是否大于0。如果是就填入数值否则填入 "-"
# - 对于样点数,我们将其转为整数
# - 对于占比和面积,我们保留两位小数
# 样点数/个
ws.cell(row=current_row, column=col_start).value = int(sample_val) if sample_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 1).value = f"{sample_pct_val:.2f}%" if sample_val > 0 else "-"
# 制图面积/万亩
ws.cell(row=current_row, column=col_start + 2).value = f"{area_val:.0f}" if area_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 3).value = f"{area_pct_val:.2f}%" if area_val > 0 else "-"
# 移动到下一个酸化等级的起始列
col_start += 4
else:
# 如果没有找到该土属的数据 (row_data 为空)
# 这意味着该土属在源数据中不存在任何样点或面积信息
# 我们将整行所有统计单元格都填充为 "-"
# acid_levels 列表包含3个等级每个等级4列总共12列
for _ in range(len(acid_levels) * 4):
ws.cell(row=current_row, column=col_start).value = "-"
col_start += 1
# --- 填充单元格的逻辑结束 ---
# 完成一行填充后行号加1为下一行做准备
current_row += 1
# 2. 计算并写入这个一级地类的“合计”行
if ws.cell(row=current_row-1, column=2).value in ["林地","草地", "其他"]:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=yl_start_row, end_column=2)
ws.cell(row=yl_start_row, column=1).value = yl
continue
ws.cell(row=current_row, column=2).value = '合计'
# 计算总样点数和总面积,仅针对当前 group_yl
yl_grand_total_samples = 0
for lvl in all_possible_levels:
if f'样点数_{lvl}' in group_yl_df:
yl_grand_total_samples += group_yl_df[f'样点数_{lvl}'].sum()
yl_grand_total_area = 0
for lvl in all_possible_levels:
if f'制图面积_亩_{lvl}' in group_yl_df:
yl_grand_total_area += group_yl_df[f'制图面积_亩_{lvl}'].sum()
col_start = 3
for level in acid_levels:
sample_sum = group_yl_df.get(f'样点数_{level}', 0).sum()
col_name = f'制图面积_亩_{level}'
area_sum = group_yl_df[col_name].sum() if col_name in group_yl_df else 0
# area_sum = group_yl_df.get(f'制图面积_亩_{level}', 0).sum()
sample_perc = (sample_sum / yl_grand_total_samples * 100) if yl_grand_total_samples > 0 else 0
area_perc = (area_sum / yl_grand_total_area * 100) if yl_grand_total_area > 0 else 0
ws.cell(row=current_row, column=col_start).value = int(sample_sum) if sample_sum > 0 else "-"
ws.cell(row=current_row, column=col_start + 1).value = f"{sample_perc:.2f}%" if sample_sum > 0 else "-"
ws.cell(row=current_row, column=col_start + 2).value = f"{area_sum:.0f}" if area_sum > 0 else "-"
ws.cell(row=current_row, column=col_start + 3).value = f"{area_perc:.2f}%" if area_sum > 0 else "-"
col_start += 4
# 3. 合并“一级地类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
current_row += 1
# --- a. 定义样式 (不变) ---
header_font = Font(name='等线', size=11, bold=True)
# --- d. 应用样式和调整列宽 (最终健壮版) ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}{current_row-1}')
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
# 调整列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
def main(gdb_path:str, ph_features:str,dltb_class_feature:str, shph_tif:str, output_path:str,target_areas_dict:dict):
try:
# --- 1. 用户配置 ---
# 输出配置
output_excel_path = os.path.join(output_path, "土地利用类型酸化统计表.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
sample_table_name = "历史样点PH信息_Table" # 图2: 样点信息表名
in_zone_feature = dltb_class_feature # 地类图斑
in_class_feature = ph_features # 已重分类好的酸化PH图层
in_value_raster = shph_tif # 赋值栅格,酸化PH栅格
out_table_area = r"土地利用类型_酸化面积表" # 输出的面积统计表名
out_table_mean = r"土地利用类型_酸化均值表" # 输出的均值表名
print("开始处理数据...")
if not arcpy.Exists(out_table_area):
# 判断输入表是否存在SHFJ字段
try:
if not arcpy.ListFields(in_zone_feature, "EJDL"):
arcpy.management.CalculateField(in_zone_feature, "EJDL", "calculate_ejdl(!DLBM!,!DLMC!)", "PYTHON3", codeblock_dltb_ejdl)
arcpy.management.CalculateField(in_zone_feature, "YJDL", "calculate_yjdl(!DLBM!)", "PYTHON3", codeblock_dltb_yjdl)
if not arcpy.ListFields(in_class_feature, "SHFJ"):
arcpy.management.CalculateField(in_class_feature, "SHFJ", "calculate_shfj(!gridcode!)", "PYTHON3", codeblock_cal_shfj)
except Exception as e:
print(f"计算SHFJ字段时发生错误: {e}")
# 拿到地类图斑的坐标系
desc = arcpy.Describe(in_zone_feature)
spatial_ref = desc.spatialReference
# 1.用arcpy.analysis.TabulateIntersection进行交集制表,面积使用地类图斑投影坐标系下面积
with arcpy.EnvManager(outputCoordinateSystem=spatial_ref):
arcpy.analysis.TabulateIntersection(
in_zone_feature,
["YJDL", "EJDL"],
in_class_feature,
out_table_area,
"SHFJ",
out_units="SQUARE_METERS",
)
if not arcpy.Exists(out_table_mean):
# 判断输入表是否存在YJDL_EJDL字段
if not arcpy.ListFields(in_zone_feature, "YJDL_EJDL"):
# 如果不存在,则添加该字段
arcpy.management.AddField(in_zone_feature, "YJDL_EJDL", "TEXT")
# 计算YJDL_EJDL字段的值
arcpy.management.CalculateField(in_zone_feature,"YJDL_EJDL","!YJDL! + '_' + !EJDL!","PYTHON3")
# 2.用arcpy.sa.ZonalStatisticsAsTable进行区域统计
mean_table = arcpy.sa.ZonalStatisticsAsTable(
in_zone_feature, "YJDL_EJDL", in_value_raster, out_table_mean, "DATA", "MEAN"
)
# 2.1 添加土壤类型字段并计算
arcpy.management.AddFields(
out_table_mean,
[["YJDL", "TEXT"],["EJDL", "TEXT"]],
)
arcpy.management.CalculateField(mean_table, "YJDL", "!YJDL_EJDL!.split('_')[0]", "PYTHON3")
arcpy.management.CalculateField(mean_table, "EJDL", "!YJDL_EJDL!.split('_')[1]", "PYTHON3")
# 生成表5.4的面积统计Excel报告
final_dataframe, soil_structure = process_data_for_table5_4(gdb_path, out_table_area, sample_table_name,target_areas_dict)
write_to_excel_table5_4(final_dataframe, soil_structure, output_excel_path)
# 生成表5.3的均值统计Excel报告
final_mean_dataframe = process_data_for_table5_3(gdb_path, out_table_mean, sample_table_name)
write_to_excel_table5_3(final_mean_dataframe, output_excel_path.replace(".xlsx", "_mean.xlsx"))
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

View File

@@ -0,0 +1,629 @@
# -*- coding: utf-8 -*-
import os
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font
from openpyxl.utils import get_column_letter
from tools.config.arcgis_field_cal_code import codeblock_cal_shfj
from tools.core.utils.excel_utils import ExcelStyleUtils
from tools.config.custom_sort import yl_order, ts_order
# --- 2. 辅助函数 ---
# 获取要素类各酸化等级面积
def get_acid_area_by_group(target_area_df):
try:
# 转为numpy数组供pandas统计使用
df = target_area_df.copy()
area_by_group = df.groupby("SHFJ")["AREA_MU"].sum()
for key in area_by_group.keys():
area_by_group[key] = area_by_group[key]
return area_by_group.to_dict()
except Exception as e:
print(f"计算面积时出错: {str(e)}")
return None
def apply_adjustment_by_each_level(df, target_area_dict):
"""
对DataFrame中的面积数据按每一个酸化等级独立进行平差。
参数:
df (pd.DataFrame): 包含面积统计的DataFrame。
target_area_dict (dict): 每个酸化等级的目标总面积字典。
例如: {'轻度酸化': 10000.0, '中度酸化': 8000.0, ...}
"""
print("\n开始按每个酸化等级独立进行平差...")
df_adjusted = df.copy()
for level, target_area in target_area_dict.items():
col_name = f'制图面积_亩_{level}'
adjusted_col_name = f'平差后面积_亩_{level}'
if col_name not in df.columns:
print(f"警告: 未找到列 '{col_name}',跳过该等级平差。")
if adjusted_col_name not in df_adjusted.columns:
df_adjusted[adjusted_col_name] = 0 # 创建一个空列
continue
# a. 计算该等级的实际总面积
actual_area = df_adjusted[col_name].sum()
if actual_area > 0:
# b. 计算误差
error = target_area - actual_area
print(f"等级 '{level}': 目标面积={target_area:.2f}, 实际面积={actual_area:.2f}, 误差={error:.2f}")
# c. 按比例分配误差
adjustment = error * (df_adjusted[col_name] / actual_area)
df_adjusted[adjusted_col_name] = df_adjusted[col_name] + adjustment
df_adjusted[adjusted_col_name] = df_adjusted[adjusted_col_name].clip(lower=0)
else:
df_adjusted[adjusted_col_name] = df_adjusted[col_name]
print("按每个酸化等级独立平差完成。")
return df_adjusted
# 获取酸化程度
def get_acidification_degree(delta_ph):
"""根据ΔpH值判断酸化程度"""
if pd.isna(delta_ph) or delta_ph == 0:
return "-"
# 请根据您的实际分级标准调整这里的阈值
if delta_ph > 1.0:
return "重度酸化"
elif 0.5 < delta_ph <= 1.0:
return "中度酸化"
elif 0.3 < delta_ph <= 0.5:
return "轻度酸化"
elif -0.3 <= delta_ph <= 0.3:
return "未酸化"
else: # dPH < -0.3
return "碱化"
# --- 3. 数据处理与分析 均值表---
def process_data_for_table5_5(gdb_path, mean_table_name, sample_table_name):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("【最终版 v2】开始处理数据...")
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# --- a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
sample_table_path = os.path.join(gdb_path, sample_table_name)
sample_fields = ['YL', 'TS', 'dPH']
df_samples = pd.DataFrame(arcpy.da.TableToNumPyArray(sample_table_path, sample_fields, 'dPH>0.3', skip_nulls=False))
df_samples = clean_df(df_samples, ['YL', 'TS'])
# 按 YL, TS 分组,计算 dPH 的均值
df_sample_means = df_samples.groupby(['YL', 'TS'])['dPH'].mean().reset_index()
df_sample_means.rename(columns={'dPH': '样点均值'}, inplace=True)
print("样点均值计算完成。")
# --- b. 处理制图数据,获取“制图均值”和“制图样点数” ---
print("--> 步骤2: 获取制图均值和样点数...")
mean_table_path = os.path.join(gdb_path, mean_table_name)
mean_fields = ['YL', 'TS', 'MEAN', 'COUNT']
df_map_data = pd.DataFrame(arcpy.da.TableToNumPyArray(mean_table_path, mean_fields, skip_nulls=False))
df_map_data = clean_df(df_map_data, ['YL', 'TS'])
df_map_data.rename(columns={'MEAN': '制图均值', 'COUNT': '制图样点数'}, inplace=True)
print("制图数据获取完成。")
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['YL', 'TS']],
df_map_data[['YL', 'TS']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['YL', 'TS'], how='left')
# **【核心修改】: 合并整个 df_map_data而不仅仅是均值列**
df_final = pd.merge(df_final, df_map_data, on=['YL', 'TS'], how='left')
# --- d. 计算酸化程度 ---
print("--> 步骤4: 计算酸化程度...")
# **【核心修改】: 在计算酸化程度之前,先过滤掉不展示的行**
# 我们只对 dPH 在酸化范围内 ( > 0.3) 的数据感兴趣
# 但为了计算合计,我们需要保留所有数据,所以这一步只计算,不删除
df_final['酸化程度_样本'] = df_final['样点均值'].apply(get_acidification_degree)
df_final['酸化程度_制图'] = df_final['制图均值'].apply(get_acidification_degree)
# (可选) 按“亚类”和“土属”排序
in_yl_order = yl_order + [x for x in df_final['YL'].unique() if x not in yl_order]
in_ts_order = ts_order + [x for x in df_final['TS'].unique() if x not in ts_order]
df_final["YL"] = pd.Categorical(df_final['YL'], categories=in_yl_order, ordered=True)
df_final["TS"] = pd.Categorical(df_final['TS'], categories=in_ts_order, ordered=True)
df_final.sort_values(['YL', 'TS'], inplace=True)
print("数据处理流程完成!")
return df_final
# --- 4. Excel 制表 均值表---
def write_to_excel_table5_5(df, output_path):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同类型土壤pH变化统计"
# --- b. 绘制表头 ---
ws.merge_cells('A1:A2'); ws['A1'] = '亚类'
ws.merge_cells('B1:B2'); ws['B1'] = '土属'
ws.merge_cells('C1:F1'); ws['C1'] = 'ΔpH'
ws['C2'] = '样点均值'
ws['D2'] = '酸化程度'
ws['E2'] = '制图均值'
ws['F2'] = '酸化程度'
# --- c. 填充数据 ---
current_row = 3
# **【核心修改】: 先对整个DataFrame进行过滤只保留需要展示的行**
# 只有当“样点酸化程度”或“制图酸化程度”不为“未酸化”、“碱化”或“-”时,才展示该行
acid_levels_to_show = ["轻度酸化", "中度酸化", "重度酸化"]
df_to_write = df[
df['酸化程度_样本'].isin(acid_levels_to_show) |
df['酸化程度_制图'].isin(acid_levels_to_show)
].copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('YL', observed=True, sort=False):
print(f"正在写入亚类: {yl}...")
yl_start_row = current_row
# 遍历该亚类下的所有“土属”
for _, row_data in group_yl_df.iterrows():
ws.cell(row=current_row, column=2).value = row_data['TS']
# 填充样点数据
sample_mean = row_data.get('样点均值')
if pd.notna(sample_mean):
ws.cell(row=current_row, column=3).value = f"{sample_mean:.2f}" if sample_mean > 0.3 else "-"
ws.cell(row=current_row, column=4).value = row_data.get('酸化程度_样本', '-') if sample_mean > 0.3 else "-"
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = "-"
# 填充制图数据
map_mean = row_data.get('制图均值')
if pd.notna(map_mean):
ws.cell(row=current_row, column=5).value = f"{map_mean:.2f}" if map_mean > 0.3 else "-"
ws.cell(row=current_row, column=6).value = row_data.get('酸化程度_制图', '-') if map_mean > 0.3 else "-"
else:
ws.cell(row=current_row, column=5).value = "-"
ws.cell(row=current_row, column=6).value = "-"
current_row += 1
# 计算并写入“合计”行
ws.cell(row=current_row, column=2).value = '合计'
# 计算合计行的均值 (均值的均值)
total_sample_mean = group_yl_df['样点均值'].mean()
if pd.notna(total_sample_mean):
ws.cell(row=current_row, column=3).value = f"{total_sample_mean:.2f}"
ws.cell(row=current_row, column=4).value = get_acidification_degree(total_sample_mean)
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = "-"
# b. **【核心修正】: 计算合计行的“制图均值”(加权平均)**
# 准备加权平均的分子和分母
weighted_sum = 0
total_count = 0
# 遍历当前亚类分组中的每一行
for _, row in group_yl_df.iterrows():
mean_val = row.get('制图均值')
count_val = row.get('制图样点数')
# 只有当均值和样点数都存在且有效时,才参与计算
if pd.notna(mean_val) and pd.notna(count_val) and count_val > 0:
weighted_sum += mean_val * count_val # Σ (mean * count)
total_count += count_val # Σ (count)
# 计算加权平均值
weighted_avg = (weighted_sum / total_count) if total_count > 0 else 0
if weighted_avg > 0:
ws.cell(row=current_row, column=5).value = f"{weighted_avg:.2f}"
ws.cell(row=current_row, column=6).value = get_acidification_degree(weighted_avg)
else:
ws.cell(row=current_row, column=5).value = "-"
ws.cell(row=current_row, column=6).value = "-"
# 合并“亚类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
current_row += 1
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
# --- d. 应用样式和调整列宽 (最终健壮版) ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}{current_row-1}')
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
# 自动调整列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
# --- 2. 数据处理与分析 (面积统计表) ---
def process_data_final(gdb_path, area_table_name, sample_table_name):
"""
【最终修正版 v2】: 先建立统一的层级结构,再分别合并统计结果。
"""
print("【最终修正版 v2】开始处理数据...")
def clean_df(df, columns):
# ... (此函数不变)
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# --- a. 从两个表中提取并建立唯一的 (YL, TS) 层级结构 "骨架" ---
print("--> 步骤1: 建立统一的层级结构...")
sample_table_path = os.path.join(gdb_path, sample_table_name)
area_table_path = os.path.join(gdb_path, area_table_name)
df_samples_raw = pd.DataFrame(arcpy.da.TableToNumPyArray(sample_table_path, ['YL', 'TS'], skip_nulls=False))
df_area_raw = pd.DataFrame(arcpy.da.TableToNumPyArray(area_table_path, ['YL', 'TS'], skip_nulls=False))
# 清理并合并两个表中的 (YL, TS) 组合
df_samples_raw = clean_df(df_samples_raw, ['YL', 'TS'])
df_area_raw = clean_df(df_area_raw, ['YL', 'TS'])
# 使用 concat 连接两个DataFrame然后用 drop_duplicates 去除重复的组合
df_skeleton = pd.concat([df_samples_raw, df_area_raw]).drop_duplicates().reset_index(drop=True)
if df_skeleton.empty:
print("警告: 无法从源数据中建立任何有效的 (YL, TS) 层级结构。")
return pd.DataFrame(), {}
print(f"已建立包含 {len(df_skeleton)} 个唯一土壤类型的层级结构。")
# --- b. 独立统计样点数据 ---
print("--> 步骤2: 独立统计样点数据...")
df_samples = pd.DataFrame(arcpy.da.TableToNumPyArray(sample_table_path, ['TS', 'YL', 'dPH'], skip_nulls=False))
df_samples = clean_df(df_samples, ['YL', 'TS'])
if not df_samples.empty:
bins = [-np.inf, -0.3, 0.3, 0.5, 1.0, np.inf]
labels = ["碱化", "未酸化", "轻度酸化", "中度酸化", "重度酸化"]
df_samples['SHFJ'] = pd.cut(df_samples['dPH'], bins=bins, labels=labels, right=True)
sample_counts = df_samples.groupby(['YL', 'TS', 'SHFJ'], observed=False).size().reset_index(name='样点数')
ts_total_samples = sample_counts.groupby(['YL', 'TS'])['样点数'].transform('sum')
sample_counts['样点占比'] = (sample_counts['样点数'] / ts_total_samples) * 100
df_sample_stats = sample_counts.pivot_table(
index=['YL', 'TS'], columns='SHFJ', values=['样点数', '样点占比'], fill_value=0, observed=False
).reset_index()
df_sample_stats.columns = [f'{col[0]}_{col[1]}'.strip('_') if col[1] else col[0] for col in df_sample_stats.columns]
# 将样点统计结果合并到骨架上
df_final = pd.merge(df_skeleton, df_sample_stats, on=['YL', 'TS'], how='left')
else:
df_final = df_skeleton.copy()
# --- c. 独立统计面积数据 ---
print("--> 步骤3: 独立统计面积数据...")
df_area = pd.DataFrame(arcpy.da.TableToNumPyArray(area_table_path, ['TS', 'YL', 'SHFJ', 'AREA'], skip_nulls=False))
df_area = clean_df(df_area, ['YL', 'TS'])
if not df_area.empty:
df_area['制图面积_亩'] = df_area['AREA'] * 0.0015
ts_total_area = df_area.groupby(['YL', 'TS'])['制图面积_亩'].transform('sum')
df_area['面积占比'] = (df_area['制图面积_亩'] / ts_total_area) * 100
df_area_stats = df_area.pivot_table(
index=['YL', 'TS'], columns='SHFJ', values=['制图面积_亩', '面积占比'], fill_value=0
).reset_index()
df_area_stats.columns = [f'{col[0]}_{col[1]}'.strip('_') if col[1] else col[0] for col in df_area_stats.columns]
# 将面积统计结果合并到 df_final 上
# 注意,这里我们合并到已经包含样点数据的 df_final 上
df_final = pd.merge(df_final, df_area_stats, on=['YL', 'TS'], how='left')
# --- d. 最后清理和构建映射 ---
df_final.fillna(0, inplace=True)
print("--> 步骤4: 自动构建层级结构...")
in_yl_order = yl_order + [x for x in df_final['YL'].unique() if x not in yl_order]
in_ts_order = ts_order + [x for x in df_final['TS'].unique() if x not in ts_order]
df_final["YL"] = pd.Categorical(df_final['YL'], categories=in_yl_order, ordered=True)
df_final["TS"] = pd.Categorical(df_final['TS'], categories=in_ts_order, ordered=True)
df_final.sort_values(['YL', 'TS'], inplace=True)
dynamic_soil_mapping = df_final.groupby('YL', observed=True)['TS'].unique().apply(list).to_dict()
# for yl in dynamic_soil_mapping:
# dynamic_soil_mapping[yl].sort()
print("数据处理流程完成!")
return df_final, dynamic_soil_mapping
# --- 3. Excel 制表 面积统计表 ---
def write_to_excel(df, soil_mapping, output_path):
"""
【最终修正版】: 将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel将创建一个空的报告。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同类型土壤酸化程度统计"
# --- b. 绘制表头 (不变) ---
ws.merge_cells('A1:A2'); ws['A1'] = '亚类'
ws.merge_cells('B1:B2'); ws['B1'] = '土属'
acid_levels = ['轻度酸化', '中度酸化', '重度酸化']
all_possible_levels = ['碱化', '未酸化', '轻度酸化', '中度酸化', '重度酸化']
acid_level_headers = ['轻度酸化(0.3<ΔpH≤0.5)', '中度酸化(0.5<ΔpH≤1.0)', '重度酸化(ΔpH>1.0)']
col_start = 3
for header in acid_level_headers:
ws.merge_cells(start_row=1, start_column=col_start, end_row=1, end_column=col_start + 3)
ws.cell(row=1, column=col_start).value = header
ws.cell(row=2, column=col_start).value = '样点数/个'
ws.cell(row=2, column=col_start + 1).value = '占比/%'
ws.cell(row=2, column=col_start + 2).value = '制图面积/亩'
ws.cell(row=2, column=col_start + 3).value = '占比/%'
col_start += 4
# --- c. 填充数据 (完全重构的逻辑) ---
current_row = 3
# 使用 .groupby('YL', sort=False) 来保证我们之前设置的排序顺序
for yl, ts_list in soil_mapping.items():
# **【关键】** group_yl 是一个只包含当前亚类数据的子DataFrame
# 我们可以安全地在这个子DataFrame上进行迭代和计算
print(f"正在写入亚类: {yl}...")
yl_start_row = current_row
# 筛选出当前亚类的所有数据
group_yl_df = df[df['YL'] == yl]
# 1. 遍历该亚类下的所有“土属”并写入数据
for ts in ts_list:
ws.cell(row=current_row, column=2).value = ts
# 在子集中查找当前土属的数据行
row_data = group_yl_df[group_yl_df['TS'] == ts]
# --- 填充单元格的逻辑开始 ---
col_start = 3 # 从第 C 列开始填充
# 检查是否找到了该土属的数据
if not row_data.empty:
# 如果找到了数据 (row_data 不为空),我们就获取这一行的数据
# .iloc[0] 获取第一行(也是唯一一行)的数据,作为一个 Series 对象
data_series = row_data.iloc[0]
# 遍历每一个酸化等级,填充对应的四列数据
for level in acid_levels:
# 1. 构建要从 data_series 中查找的列名
sample_col = f'样点数_{level}'
sample_pct_col = f'样点占比_{level}'
area_col = f'平差后面积_亩_{level}'
area_pct_col = f'面积占比_{level}'
# 2. 从 data_series 中安全地获取值
# 使用 .get(key, default_value) 的好处是,如果列名不存在,它会返回默认值(0),而不会报错
sample_val = data_series.get(sample_col, 0)
sample_pct_val = data_series.get(sample_pct_col, 0)
area_val = data_series.get(area_col, 0)
area_pct_val = data_series.get(area_pct_col, 0)
# 3. 将获取到的值填入单元格
# - 对于数值我们判断它是否大于0。如果是就填入数值否则填入 "-"
# - 对于样点数,我们将其转为整数
# - 对于占比和面积,我们保留两位小数
# 样点数/个
ws.cell(row=current_row, column=col_start).value = int(sample_val) if sample_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 1).value = f"{sample_pct_val:.2f}%" if sample_val > 0 else "-"
# 制图面积/亩
ws.cell(row=current_row, column=col_start + 2).number_format = "0.00"
ws.cell(row=current_row, column=col_start + 2).value = f"{area_val:.0f}" if area_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 3).value = f"{area_pct_val:.2f}%" if area_val > 0 else "-"
# 移动到下一个酸化等级的起始列
col_start += 4
else:
# 如果没有找到该土属的数据 (row_data 为空)
# 这意味着该土属在源数据中不存在任何样点或面积信息
# 我们将整行所有统计单元格都填充为 "-"
# acid_levels 列表包含3个等级每个等级4列总共12列
for _ in range(len(acid_levels) * 4):
ws.cell(row=current_row, column=col_start).value = "-"
col_start += 1
# --- 填充单元格的逻辑结束 ---
# 完成一行填充后行号加1为下一行做准备
current_row += 1
# 2. 计算并写入这个亚类的“合计”行
ws.cell(row=current_row, column=2).value = '合计'
# 计算总样点数和总面积,仅针对当前 group_yl
yl_grand_total_samples = 0
for lvl in all_possible_levels:
if f'样点数_{lvl}' in group_yl_df:
yl_grand_total_samples += group_yl_df[f'样点数_{lvl}'].sum()
yl_grand_total_area = 0
for lvl in all_possible_levels:
if f'制图面积_亩_{lvl}' in group_yl_df:
yl_grand_total_area += group_yl_df[f'制图面积_亩_{lvl}'].sum()
col_start = 3
for level in acid_levels:
sample_sum = group_yl_df.get(f'样点数_{level}', 0).sum()
col_name = f'制图面积_亩_{level}'
area_sum = group_yl_df[col_name].sum() if col_name in group_yl_df else 0
# area_sum = group_yl_df.get(f'平差后面积_亩_{level}', 0).sum()
sample_perc = (sample_sum / yl_grand_total_samples * 100) if yl_grand_total_samples > 0 else 0
area_perc = (area_sum / yl_grand_total_area * 100) if yl_grand_total_area > 0 else 0
ws.cell(row=current_row, column=col_start).value = int(sample_sum) if sample_sum > 0 else "-"
ws.cell(row=current_row, column=col_start + 1).value = f"{sample_perc:.2f}%" if sample_sum > 0 else "-"
ws.cell(row=current_row, column=col_start + 2).value = f"{area_sum:.0f}" if area_sum > 0 else "-"
ws.cell(row=current_row, column=col_start + 3).value = f"{area_perc:.2f}%" if area_sum > 0 else "-"
col_start += 4
# 3. 合并“亚类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
current_row += 1
# --- a. 定义样式 (不变) ---
header_font = Font(name='等线', size=11, bold=True)
# d. 应用样式和调整列宽
max_col = 2 + len(acid_levels) * 4
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:{get_column_letter(max_col)}{current_row-1}')
ExcelStyleUtils.set_style(ws, f'A1:{get_column_letter(max_col)}2', header_font)
# 调整列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
def main(gdb_path, trlx_polygon, sh_ph_polygon, ph_raster, output_path, target_areas_df):
try:
# --- 1. 用户配置 ---
sample_table_name = "历史样点PH信息_Table" # 图2: 样点信息表名
# 输出配置
output_excel_path = os.path.join(output_path, "土壤类型酸化统计表.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
in_zone_feature = trlx_polygon # 土壤类型图
# in_class_feature = sh_ph_polygon # 已重分类好的酸化PH图层
in_class_feature = "最小面积统计单元"
in_value_raster = ph_raster # 酸化PH栅格
dltb_ph_statstable = "土地利用类型_酸化面积表" # 土壤类型_酸化面积表gdb table
out_table_area = r"土壤类型_酸化面积表" # 输出的交集表名
out_table_mean = r"土壤类型_酸化均值表" # 输出的均值表名
print("开始处理数据...")
if not arcpy.Exists(out_table_area):
# 判断输入表是否存在SHFJ字段
try:
arcpy.management.CalculateField(in_class_feature, "SHFJ", "calculate_shfj(!gridcode!)", "PYTHON3", codeblock_cal_shfj)
except Exception as e:
print(f"计算SHFJ字段时发生错误: {e}")
# 1.用arcpy.analysis.TabulateIntersection进行交集制表
arcpy.analysis.TabulateIntersection(
in_zone_feature,
["TS", "YL"],
in_class_feature,
out_table_area,
"SHFJ",
out_units="SQUARE_METERS",
)
if not arcpy.Exists(out_table_mean):
# 判断输入表是否存在YL_TS字段
if not arcpy.ListFields(in_zone_feature, "YL_TS"):
# 如果不存在,则添加该字段
arcpy.management.AddField(in_zone_feature, "YL_TS", "TEXT")
# 计算YL_TS字段的值
arcpy.management.CalculateField(in_zone_feature,"YL_TS","!YL! + '_' + !TS!","PYTHON3")
# 2.用arcpy.sa.ZonalStatisticsAsTable进行区域统计
mean_table = arcpy.sa.ZonalStatisticsAsTable(
in_zone_feature, "YL_TS", in_value_raster, out_table_mean, "DATA", "MEAN"
)
# 2.1 添加土壤类型字段并计算
arcpy.management.AddFields(
out_table_mean,
[["YL", "TEXT"],["TS", "TEXT"]],
)
arcpy.management.CalculateField(mean_table, "YL", "!YL_TS!.split('_')[0]", "PYTHON3")
arcpy.management.CalculateField(mean_table, "TS", "!YL_TS!.split('_')[1]", "PYTHON3")
# 生成表5.4的面积统计Excel报告
final_dataframe, soil_structure = process_data_final(gdb_path, out_table_area, sample_table_name)
# 统计地类图斑酸化总面积亩
each_acid_area = get_acid_area_by_group(target_areas_df)
print(f"容县土壤类型图斑总 acid 总面积(亩):{each_acid_area}")
# 执行平差计算
if each_acid_area:
adjusted_dataframe = apply_adjustment_by_each_level(final_dataframe, each_acid_area)
print("使用平差值进行修正!")
write_to_excel(adjusted_dataframe, soil_structure, output_excel_path)
else:
print("未使用平差值进行修正!")
write_to_excel(final_dataframe, soil_structure, output_excel_path)
# 生成表5.4的均值统计Excel报告
final_mean_dataframe = process_data_for_table5_5(gdb_path, out_table_mean, sample_table_name)
write_to_excel_table5_5(final_mean_dataframe, output_excel_path.replace(".xlsx", "_mean.xlsx"))
# adjusted_dataframe.to_csv(output_excel_path.replace(".xlsx", "_adjusted.csv"), index=False)
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

View File

@@ -0,0 +1,167 @@
# -*- coding: utf-8 -*-
import sys
import arcpy
from pathlib import Path
sys.path.append(str(Path(__file__).parent))
from tools.config.arcgis_field_cal_code import codeblock_dltb_ejdl, codeblock_dltb_yjdl
def export_to_points(ph_points, dltb_features, trlx_features, xzq_features, assign_raster, workspace):
# --- 1. 设置工作空间和变量 ---
# 请根据您的实际情况修改以下路径
arcpy.env.workspace = workspace
arcpy.env.overwriteOutput = True
# 输入的要素类
input_features = ph_points # 历史样点PH数据
join_features_list = [trlx_features,xzq_features,dltb_features] # 连接图层 (规划分区)
# 输出的要素类
final_output_fc = "历史样点PH信息_Table"
# --- 3. 主处理逻辑 ---
try:
print("开始处理赋值样点PH信息...")
target_features = f"in_memory/temp_sample_raster"
# 将栅格数据提取至历史PH样点
arcpy.sa.ExtractValuesToPoints(
in_point_features=input_features,
in_raster=assign_raster,
out_point_features=target_features,
interpolate_values="NONE",
add_attributes="VALUE_ONLY"
)
print("开始计算地类一二级类别...")
# 计算地类图斑一级、二级类别
try:
arcpy.management.CalculateField(dltb_features, "EJDL", "calculate_ejdl(!DLBM!,!DLMC!)", "PYTHON3", codeblock_dltb_ejdl)
arcpy.management.CalculateField(dltb_features, "YJDL", "calculate_yjdl(!DLBM!)", "PYTHON3", codeblock_dltb_yjdl)
arcpy.management.CalculateField(dltb_features, "YJDLBM", "!DLBM![:2]", "PYTHON3")
raster_path = Path(assign_raster)
# if "二普" in raster_path.stem or "测土" in raster_path.stem:
arcpy.management.CalculateField(target_features, "dPH", "!RASTERVALU!-!PH!", "PYTHON3", field_type="DOUBLE")
# else:
# arcpy.management.CalculateField(target_features, "dPH", "!PH!-!RASTERVALU!", "PYTHON3", field_type="DOUBLE")
except Exception as e:
print(e)
# --- 2. 定义要保留的字段 ---
# 这是一个非常清晰的配置方式:指定每个图层要保留的字段列表
fields_to_keep = {
target_features: ["PH", "RASTERVALU", "dPH"],
trlx_features: ["YL", "TS"],
xzq_features: ["XZQMC"],
dltb_features: ["YJDL", "EJDL"]
}
print("开始配置字段映射...")
# 初始化当前的目标图层,最开始是原始的目标图层
current_target = target_features
# 存储所有中间生成的临时文件,以便最后清理
temp_outputs = []
temp_outputs.append(target_features)
# 获取目标图层的所有字段,以便在后续迭代中保留
retained_fields = fields_to_keep.get(target_features, [])
# 迭代处理每一个连接图层
for i, join_features in enumerate(join_features_list):
print(f"\n--- 开始处理第 {i+1} 个连接图层: {join_features} ---")
# 检查连接图层是否存在
if not arcpy.Exists(join_features):
print(f"警告: 连接图层 '{join_features}' 不存在,将跳过此连接。")
continue
# --- 配置 FieldMappings ---
field_mappings = arcpy.FieldMappings()
# a. 保留已经存在于 current_target 中的字段
# 这些字段是在之前的迭代中保留下来的
for field_name in retained_fields:
try:
field_map = arcpy.FieldMap()
field_map.addInputField(current_target, field_name)
field_mappings.addFieldMap(field_map)
except Exception:
# 如果字段在之前的某个步骤中未能成功添加,这里会捕获异常
print(f"注意: 在图层 '{current_target}' 中未找到字段 '{field_name}',可能在之前的步骤中已被跳过。")
# b. 从当前的 join_features 中添加新字段
fields_from_current_join = fields_to_keep.get(join_features, [])
for field_name in fields_from_current_join:
try:
field_map = arcpy.FieldMap()
field_map.addInputField(join_features, field_name)
field_map.mergeRule = "First" # 对所有连接字段使用 "First" 规则
field_mappings.addFieldMap(field_map)
except Exception as e:
print(f"警告: 添加字段 '{field_name}' (来自 '{join_features}') 时出错,将跳过。错误信息: {e}")
# 如果本次迭代没有有效的字段映射,则跳过
if field_mappings.fieldCount == 0:
print(f"警告: 对于连接图层 '{join_features}' 没有有效的字段可以添加,跳过此连接。")
continue
# 定义本次连接的临时输出名
# 使用 in_memory 工作空间可以提高性能
temp_output = f"in_memory/temp_join_{i}"
temp_outputs.append(temp_output)
print(f"执行空间连接: '{current_target}' + '{join_features}' -> '{temp_output}'")
# 执行空间连接
arcpy.analysis.SpatialJoin(
target_features=current_target,
join_features=join_features,
out_feature_class=temp_output,
join_operation="JOIN_ONE_TO_ONE",
join_type="KEEP_ALL",
field_mapping=field_mappings,
match_option="INTERSECT"
)
# 更新 current_target 为本次操作的输出,以便下一次迭代使用
current_target = temp_output
# 更新已保留字段列表,为下一次迭代做准备
retained_fields.extend(fields_from_current_join)
print(f"连接成功。目前已保留的字段: {retained_fields}")
# --- 4. 保存最终结果并清理 ---
# 将最后一个临时输出复制或重命名为最终结果
if arcpy.Exists(current_target):
print(f"\n所有连接完成。将最终结果 '{current_target}' 保存为 '{final_output_fc}'...")
# arcpy.management.CopyFeatures(current_target, final_output_fc)
arcpy.conversion.ExportTable(current_target, final_output_fc)
print("最终结果已保存。")
# 验证输出字段
output_fields = [f.name for f in arcpy.ListFields(final_output_fc)]
print(f"最终输出的字段为: {output_fields}")
else:
print("警告: 没有任何连接操作成功执行,未生成最终输出。")
except arcpy.ExecuteError:
print("\n--- ArcPy 执行错误 ---")
print(arcpy.GetMessages(2))
except Exception as e:
print(f"\n--- 发生未预料的错误 ---")
print(e)
finally:
# 清理所有中间生成的临时文件
print("\n开始清理临时文件...")
for temp_file in temp_outputs:
if arcpy.Exists(temp_file):
arcpy.management.Delete(temp_file)
print(f"已删除临时文件: {temp_file}")
print("清理完成。")

View File

@@ -0,0 +1,641 @@
# -*- coding: utf-8 -*-
import os
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font
from openpyxl.utils import get_column_letter
from tools.config.arcgis_field_cal_code import codeblock_cal_shfj
from tools.core.utils.excel_utils import ExcelStyleUtils
yjdl_order = ["耕地", "园地", "林地", "草地", "其他"]
ejdl_order = ["水田", "旱地", "水浇地", "果园", "茶园", "橡胶园", "其他园地"]
# --- 2. 辅助函数 ---
# 等级计算
def get_acidification_degree(delta_ph):
"""根据ΔpH值判断酸化程度"""
if pd.isna(delta_ph) or delta_ph == 0:
return "-"
# 请根据您的实际分级标准调整这里的阈值
if delta_ph > 1.0:
return "重度酸化"
elif 0.5 < delta_ph <= 1.0:
return "中度酸化"
elif 0.3 < delta_ph <= 0.5:
return "轻度酸化"
elif 0.1 < delta_ph <= 0.3:
return "弱酸化"
else: # dPH < -0.3
return "其他"
# --- 3. 数据处理与分析 均值---
def process_data_for_table5_7(gdb_path, mean_table_name, sample_table_name):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# --- a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
sample_table_path = os.path.join(gdb_path, sample_table_name)
sample_fields = ['XZQMC','YJDL','EJDL', 'dPH']
df_samples = pd.DataFrame(arcpy.da.TableToNumPyArray(sample_table_path, sample_fields, 'dPH>0.3', skip_nulls=False))
df_samples = clean_df(df_samples, ['XZQMC','YJDL', 'EJDL'])
# 按 YJDL, EJDL 分组,计算 dPH 的均值
df_sample_means = df_samples.groupby(['XZQMC'])['dPH'].mean().reset_index()
df_sample_means.rename(columns={'dPH': '样点均值'}, inplace=True)
print("样点均值计算完成。")
# --- b. 处理制图数据,获取“制图均值”和“制图样点数” ---
print("--> 步骤2: 获取制图均值和样点数...")
mean_table_path = os.path.join(gdb_path, mean_table_name)
mean_fields = ['XZQMC', 'MEAN', 'COUNT']
df_map_data = pd.DataFrame(arcpy.da.TableToNumPyArray(mean_table_path, mean_fields, skip_nulls=False))
df_map_data = clean_df(df_map_data, ['XZQMC'])
df_map_data.rename(columns={'MEAN': '制图均值', 'COUNT': '制图样点数'}, inplace=True)
print("制图数据获取完成。")
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['XZQMC']],
df_map_data[['XZQMC']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['XZQMC'], how='left')
# **【核心修改】: 合并整个 df_map_data而不仅仅是均值列**
df_final = pd.merge(df_final, df_map_data, on=['XZQMC'], how='left')
# --- d. 计算酸化程度 ---
print("--> 步骤4: 计算酸化程度...")
# **【核心修改】: 在计算酸化程度之前,先过滤掉不展示的行**
# 我们只对 dPH 在酸化范围内 ( > 0.3) 的数据感兴趣
# 但为了计算合计,我们需要保留所有数据,所以这一步只计算,不删除
df_final['酸化程度_样本'] = df_final['样点均值'].apply(get_acidification_degree)
df_final['酸化程度_制图'] = df_final['制图均值'].apply(get_acidification_degree)
df_final.sort_values(['XZQMC'], inplace=True)
print("数据处理流程完成!")
return df_final
# --- 4. Excel 制表 均值---
def write_to_excel_table5_7(df, output_path):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同土地利用类型pH变化统计"
# --- b. 绘制表头 ---
ws.merge_cells('A1:A2'); ws['A1'] = '乡镇/街道'
ws.merge_cells('B1:E1'); ws['B1'] = 'ΔpH'
ws['B2'] = '样点均值'
ws['C2'] = '酸化程度'
ws['D2'] = '制图均值'
ws['E2'] = '酸化程度'
# --- c. 填充数据 ---
current_row = 3
# **【核心修改】: 先对整个DataFrame进行过滤只保留需要展示的行**
acid_levels_to_show = ["弱酸化", "轻度酸化", "中度酸化", "重度酸化", "其他"]
df_to_write = df[
df['酸化程度_样本'].isin(acid_levels_to_show) | df['酸化程度_制图'].isin(acid_levels_to_show)
].copy() # 使用 .copy() 避免 SettingWithCopyWarning
for _, row_data in df_to_write.iterrows():
print(f"正在写入一级地类...")
# 写入数据”
ws.cell(row=current_row, column=1).value = row_data['XZQMC']
# 填充样点数据
sample_mean = row_data.get('样点均值')
if pd.notna(sample_mean):
ws.cell(row=current_row, column=2).value = f"{sample_mean:.2f}" if sample_mean > 0.3 else "-"
ws.cell(row=current_row, column=3).value = row_data.get('酸化程度_样本', '-') if sample_mean > 0.3 else "-"
else:
ws.cell(row=current_row, column=2).value = "-"
ws.cell(row=current_row, column=3).value = "-"
# 填充制图数据
map_mean = row_data.get('制图均值')
if pd.notna(map_mean):
ws.cell(row=current_row, column=4).value = f"{map_mean:.2f}" if map_mean > 0.3 else "-"
ws.cell(row=current_row, column=5).value = row_data.get('酸化程度_制图', '-') if map_mean > 0.3 else "-"
else:
ws.cell(row=current_row, column=4).value = "-"
ws.cell(row=current_row, column=5).value = "-"
current_row += 1
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}{current_row-1}')
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
# 设置列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
# --- 2. 数据处理与分析 面积 各乡镇---
def process_data_for_table5_4(gdb_path, area_table_name, target_area_dict):
"""
【最终修正版 v2】: 先建立统一的层级结构,再分别合并统计结果。
"""
print("【最终修正版 v2】开始处理数据...")
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# --- a. 独立统计面积数据 ---
print("--> 步骤1: 独立统计面积数据...")
area_table_path = os.path.join(gdb_path, area_table_name)
df_area = pd.DataFrame(arcpy.da.TableToNumPyArray(area_table_path, ['XZQMC', 'SHFJ', 'AREA'], skip_nulls=False))
df_area = clean_df(df_area, ['XZQMC'])
df_final = pd.DataFrame()
if not df_area.empty:
# 计算平差系数
target_shfj_areas = target_area_dict.groupby(['SHFJ'])['AREA_MU'].sum().reset_index()
original_shfj_areas = df_area.groupby(['SHFJ'])['AREA'].sum().reset_index()
original_shfj_areas['AREA_MU'] = original_shfj_areas['AREA'] * 0.0015
adjustment_factors = []
for index, row in original_shfj_areas.iterrows():
shfj = row['SHFJ']
area_mu = row['AREA_MU']
adjustment_factor = target_shfj_areas[target_shfj_areas['SHFJ'] == shfj]['AREA_MU'].values[0] / area_mu
adjustment_factors.append({
'SHFJ': shfj,
'平差系数':adjustment_factor
})
factor_df = pd.DataFrame(adjustment_factors)
df_sh_area = df_area.merge(factor_df[['SHFJ', '平差系数']], on='SHFJ')
df_sh_area['制图面积_亩'] = df_sh_area['AREA'] * 0.0015 * df_sh_area['平差系数']
ts_total_area = df_sh_area.groupby(['XZQMC'])['制图面积_亩'].transform('sum')
df_sh_area['面积占比'] = (df_sh_area['制图面积_亩'] / ts_total_area) * 100
df_area_stats = df_sh_area.pivot_table(
index=['XZQMC'], columns='SHFJ', values=['制图面积_亩', '面积占比'], fill_value=0
).reset_index()
df_area_stats.columns = [f'{col[0]}_{col[1]}'.strip('_') if col[1] else col[0] for col in df_area_stats.columns]
df_final = df_area_stats
print("--> 步骤2: 计算酸化面积合计...")
# 定义属于酸化类别的面积列
acidic_area_cols = [
'制图面积_亩_轻度酸化',
'制图面积_亩_中度酸化',
'制图面积_亩_重度酸化'
]
# 确保这些列存在于DataFrame中不存在的列用0代替
for col in acidic_area_cols:
if col not in df_final.columns:
df_final[col] = 0
# 将这三列相加,得到合计值
df_final['酸化面积合计_亩'] = df_final[acidic_area_cols].sum(axis=1)
# --- d. 最后清理和构建映射 ---
df_final.fillna(0, inplace=True)
print("数据处理流程完成!")
return df_final
# --- 3. Excel 制表 面积---
def write_to_excel_table5_4(df, output_path):
"""
【最终修正版】: 将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel将创建一个空的报告。")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同乡镇酸化面积统计"
ws['A1'] = "没有有效的统计数据。"
wb.save(output_path)
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同乡镇酸化面积统计"
# --- b. 绘制表头 (不变) ---
ws.merge_cells('A1:A2'); ws['A1'] = '乡镇/街道'
acid_levels = ['弱酸化', '轻度酸化', '中度酸化', '重度酸化', '其他']
# acid_level_headers = ['0.1<ΔpH≤0.3', '0.3<ΔpH≤0.5', '0.5<ΔpH≤1.0', 'ΔpH>1.0', '其他']
# all_possible_levels = ['碱化', '未酸化', '轻度酸化', '中度酸化', '重度酸化']
acid_level_headers = ['弱酸化(0.1<ΔpH≤0.3)','轻度酸化(0.3<ΔpH≤0.5)', '中度酸化(0.5<ΔpH≤1.0)', '重度酸化(ΔpH>1.0)', '其他(未酸化)']
col_start = 2
for header in acid_level_headers:
ws.merge_cells(start_row=1, start_column=col_start, end_row=1, end_column=col_start + 1)
ws.cell(row=1, column=col_start).value = header
ws.cell(row=2, column=col_start).value = '面积/亩'
ws.cell(row=2, column=col_start + 1).value = '占比/%'
col_start += 2
# 增加合计列的表头**
total_col = col_start # 记录合计列的列号
ws.merge_cells(start_row=1, start_column=total_col, end_row=2, end_column=total_col)
ws.cell(row=1, column=total_col).value = '酸化面积合计'
# --- c. 填充数据 (完全重构的逻辑) ---
current_row = 3
# **【核心修改】: 不再需要 group_yl_df直接遍历整个 df**
# 假设 df 已经按 XZQMC 排序(如果需要的话)
df_sorted = df.sort_values('XZQMC').reset_index(drop=True)
for index, row_data in df_sorted.iterrows():
ws.cell(row=current_row, column=1).value = row_data['XZQMC']
col_start = 2
for level in acid_levels:
area_col = f'制图面积_亩_{level}'
area_pct_col = f'面积占比_{level}'
area_val = row_data.get(area_col, 0)
area_pct_val = row_data.get(area_pct_col, 0)
ws.cell(row=current_row, column=col_start).value = f"{area_val:.0f}" if area_val > 0 else "-"
ws.cell(row=current_row, column=col_start + 1).value = f"{area_pct_val:.2f}%" if area_val > 0 else "-"
col_start += 2
# **【核心修改】: 填充酸化面积合计列的值**
total_area_val = row_data.get('酸化面积合计_亩', 0)
ws.cell(row=current_row, column=total_col).value = f"{total_area_val:.0f}" if total_area_val > 0 else "-"
current_row += 1
# **(可选) 增加一个所有乡镇的“总合计”行**
# print("--> 计算并写入总合计行...")
# ws.cell(row=current_row, column=1).value = '总合计'
# col_start = 2
# for level in acid_levels:
# area_col = f'制图面积_亩_{level}'
# area_sum = df_sorted.get(area_col, 0).sum()
# # 总合计行的占比是相对于所有乡镇的总面积
# grand_total_area = df_sorted[[f'制图面积_亩_{lvl}' for lvl in all_possible_levels if f'制图面积_亩_{lvl}' in df_sorted]].sum().sum()
# area_perc = (area_sum / grand_total_area * 100) if grand_total_area > 0 else 0
# ws.cell(row=current_row, column=col_start).value = f"{area_sum:.2f}" if area_sum > 0 else "-"
# ws.cell(row=current_row, column=col_start + 1).value = f"{area_perc:.2f}" if area_sum > 0 else "-"
# col_start += 2
# grand_total_acidic_area = df_sorted['酸化面积合计_亩'].sum()
# ws.cell(row=current_row, column=total_col).value = f"{grand_total_acidic_area:.2f}" if grand_total_acidic_area > 0 else "-"
# current_row += 1
# --- a. 定义样式 (不变) ---
header_font = Font(name='等线', size=11, bold=True)
# --- d. 应用样式和调整列宽 (最终健壮版) ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}{current_row-1}')
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
# 设置列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
# 步骤5.3: 生成表5.3 - 总表数据处理
def process_data_for_table5_2(gdb_path, area_table_name, sample_table_name, target_area_dict:pd.DataFrame):
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# --- a. 从两个表中提取并建立唯一的 (YJDL, EJDL) 层级结构 "骨架" ---
print("--> 步骤1: 建立统一的层级结构...")
sample_table_path = os.path.join(gdb_path, sample_table_name)
area_table_path = os.path.join(gdb_path, area_table_name)
# --- b. 独立统计样点数据 ---
print("--> 步骤2: 独立统计样点数据...")
df_samples = pd.DataFrame(arcpy.da.TableToNumPyArray(sample_table_path, ['XZQMC', 'dPH'], skip_nulls=False))
df_samples = clean_df(df_samples, ['XZQMC'])
if not df_samples.empty:
bins = [-np.inf, 0.1, 0.3, 0.5, 1.0, np.inf]
labels = ["其他", "弱酸化", "轻度酸化", "中度酸化", "重度酸化"]
df_samples['SHFJ'] = pd.cut(df_samples['dPH'], bins=bins, labels=labels, right=True)
sample_counts = df_samples.groupby(['SHFJ'], observed=False).size().reset_index(name='样点数')
sample_counts = sample_counts.merge(df_samples.groupby(['SHFJ'], observed=False)['dPH'].mean(), on='SHFJ')
ts_total_samples = sample_counts['样点数'].sum()
sample_counts['样点占比'] = (sample_counts['样点数'] / ts_total_samples) * 100
# print(sample_counts)
# --- c. 独立统计面积数据 ---
print("--> 步骤3: 独立统计面积数据...")
df_area = pd.DataFrame(arcpy.da.TableToNumPyArray(area_table_path, ['XZQMC', 'SHFJ', 'AREA'], skip_nulls=False))
df_area = clean_df(df_area, ['XZQMC'])
if not df_area.empty:
# 计算平差系数
target_shfj_areas = target_area_dict.groupby(['SHFJ'])['AREA_MU'].sum().reset_index()
original_shfj_areas = df_area.groupby(['SHFJ'])['AREA'].sum().reset_index()
original_shfj_areas['AREA_MU'] = original_shfj_areas['AREA'] * 0.0015
adjustment_factors = []
for index, row in original_shfj_areas.iterrows():
shfj = row['SHFJ']
area_mu = row['AREA_MU']
adjustment_factor = target_shfj_areas[target_shfj_areas['SHFJ'] == shfj]['AREA_MU'].values[0] / area_mu
adjustment_factors.append({
'SHFJ': shfj,
'平差系数':adjustment_factor
})
factor_df = pd.DataFrame(adjustment_factors)
df_sh_area = df_area.merge(factor_df[['SHFJ', '平差系数']], on='SHFJ')
df_sh_area['制图面积_亩'] = df_sh_area['AREA'] * 0.0015 * df_sh_area['平差系数']
df_area_counts = df_sh_area.groupby(['SHFJ'], observed=False)[['制图面积_亩']].sum()
ts_total_area = df_area_counts['制图面积_亩'].sum()
df_area_counts['面积占比'] = (df_area_counts['制图面积_亩'] / ts_total_area) * 100
df_final = pd.merge(sample_counts, df_area_counts, on=['SHFJ'], how='left')
# # --- d. 最后清理和构建映射 ---
df_final.fillna(0, inplace=True)
return df_final
# --- 3. Excel 制表 总表---
def write_to_excel_table5_2(df, df_mean, output_path):
"""
【最终修正版】: 将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel将创建一个空的报告。")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws['A1'] = "没有有效的统计数据。"
wb.save(output_path)
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "行政区酸化程度等级分布及占比"
# --- b. 绘制表头 (不变) ---
ws.merge_cells('A1:B1'); ws['A1'] = '酸化程度'
ws.merge_cells('C1:D1'); ws['C1'] = '样点统计'
ws.merge_cells('E1:F1'); ws['E1'] = '制图统计'
ws.merge_cells('A8:B8'); ws['A8'] = '总计'
ws.merge_cells('A9:B9'); ws['A9'] = '全县酸化样点ΔpH 均值'
ws.merge_cells('A10:B10'); ws['A10'] = '全县酸化制图ΔpH 均值'
ws['A2'] = '分级'; ws['B2'] = '值域'
ws['C2'] = '数量/个'; ws['D2'] = '占比'
ws['E2'] = '面积/亩'; ws['F2'] = '占比'
acid_levels = ['弱酸化', '轻度酸化', '中度酸化', '重度酸化', '其他']
acid_level_headers = ['0.1<ΔpH≤0.3', '0.3<ΔpH≤0.5', '0.5<ΔpH≤1.0', 'ΔpH>1.0', '未酸化']
# --- c. 填充数据 ---
current_row = 3
# 1. 遍历该一级地类下的所有“二级地类”并写入数据
for index,level in enumerate(acid_levels):
ws.cell(row=current_row, column=1).value = level
ws.cell(row=current_row, column=2).value = acid_level_headers[index]
# 在子集中查找当前二级地类的数据行
row_data = df[df['SHFJ'] == level]
# --- 填充单元格的逻辑开始 ---
col_start = 3 # 从第 C 列开始填充
# 检查是否找到了该土属的数据
if not row_data.empty:
data_series = row_data.iloc[0]
# 1. 构建要从 data_series 中查找的列名
sample_col = f'样点数'
sample_pct_col = f'样点占比'
area_col = f'制图面积_亩'
area_pct_col = f'面积占比'
# 2. 从 data_series 中安全地获取值
sample_val = data_series.get(sample_col, 0)
sample_pct_val = data_series.get(sample_pct_col, 0)
area_val = data_series.get(area_col, 0)
area_pct_val = data_series.get(area_pct_col, 0)
# 3. 将获取到的值填入单元格
ws.cell(row=current_row, column=col_start).value = f"{sample_val:.0f}" if sample_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 1).value = f"{sample_pct_val:.2f}%" if sample_val > 0 else "-"
# 制图面积/亩
ws.cell(row=current_row, column=col_start + 2).value = f"{area_val:.0f}" if area_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 3).value = f"{area_pct_val:.2f}%" if area_val > 0 else "-"
# 移动到下一个酸化等级的起始列
col_start += 2
else:
for _ in range(4):
ws.cell(row=current_row, column=col_start).value = "-"
col_start += 1
current_row += 1
# 合计单元格填充
mask = df["SHFJ"].isin(acid_levels)
df_acid = df[mask]
weighted_avg = (df_acid["dPH"] * df_acid["样点数"]).sum() / df_acid["样点数"].sum()
mean_msk = df_mean["酸化程度_制图"].isin(acid_levels)
df_mean_acid = df_mean[mean_msk]
weighted_mean = (df_mean_acid["制图均值"] * df_mean_acid["制图样点数"]).sum() / df_mean_acid["制图样点数"].sum()
ws.merge_cells('C9:F9')
ws.merge_cells('C10:F10')
ws['C8'] = df[df['SHFJ'].isin(acid_levels)]['样点数'].sum()
ws['D8'] = f"{df[df['SHFJ'].isin(acid_levels)]['样点占比'].sum():.2f}%"
ws['E8'] = f"{df[df['SHFJ'].isin(acid_levels)]['制图面积_亩'].sum():.0f}"
ws['F8'] = f"{df[df['SHFJ'].isin(acid_levels)]['面积占比'].sum():.2f}%"
ws['C9'] = f"{weighted_avg:.2f}" # type: ignore
ws['C10'] = f"{weighted_mean:.2f}"
# --- a. 定义样式 (不变) ---
header_font = Font(name='宋体', size=11)
# --- d. 应用样式和调整列宽 (最终健壮版) ---
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:F10')
ExcelStyleUtils.set_style(ws, f'A1:F2', header_font)
print("正在自动调整列宽...")
# 设置列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
def main(gdb_path, xzq_features, ph_features, dltb_features, sh_ph_tif, output_path,target_areas_dict:dict):
try:
# --- 1. 用户配置 ---
# 输出配置
output_excel_path = os.path.join(output_path,"乡镇街道酸化统计表.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
sample_table_name = "历史样点PH信息_Table" # 图2: 样点信息表名
in_zone_feature = xzq_features # 规划分区图层
in_class_feature = ph_features # 已重分类好的酸化PH图层
dltb_class_feature = dltb_features
in_value_raster = sh_ph_tif # 赋值栅格
out_feature_class = "最小面积统计单元"
out_table_area = r"行政区划_酸化面积表" # 输出的交集表名
out_table_mean = r"行政区划_酸化均值表" # 输出的均值表名
print("开始处理数据...")
if not arcpy.Exists(out_feature_class):
# 判断输入表是否存在SHFJ字段
try:
arcpy.management.CalculateField(in_class_feature, "SHFJ", "calculate_shfj(!gridcode!)", "PYTHON3", codeblock_cal_shfj)
except Exception as e:
print(f"计算SHFJ字段时发生错误: {e}")
arcpy.analysis.Intersect(
in_features=[dltb_class_feature, in_class_feature],
out_feature_class=out_feature_class,
join_attributes="ALL",
output_type="INPUT"
)
if not arcpy.Exists(out_table_area):
# 1.用arcpy.analysis.TabulateIntersection进行交集制表
arcpy.analysis.TabulateIntersection(
in_zone_feature,
["XZQMC"],
out_feature_class,
out_table_area,
"SHFJ",
out_units="SQUARE_METERS",
)
if not arcpy.Exists(out_table_mean):
# 2.用arcpy.sa.ZonalStatisticsAsTable进行区域统计
arcpy.sa.ZonalStatisticsAsTable(
in_zone_feature, "XZQMC", in_value_raster, out_table_mean, "DATA", "MEAN"
)
# 计算按地类平差后的各酸化等级面积
if arcpy.Exists(out_feature_class):
df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_feature_class, ["YJDL", "SHFJ", "Shape_Area"]))
df_area = df.groupby(["YJDL", "SHFJ"]).agg({"Shape_Area": "sum"}).reset_index()
yjdl_area = df_area.groupby(['YJDL'])['Shape_Area'].sum().reset_index()
landuse_types = {'耕地':'01', '园地':'02', '林地':'03', '草地':'04', '其他':'12'}
adjustment_factors = []
for _, row in yjdl_area.iterrows():
yjdl = row['YJDL']
original_total = row['Shape_Area'] * 0.0015
target_total = target_areas_dict.get(landuse_types[yjdl], original_total)
adjustment_factor = target_total / original_total
adjustment_factors.append({
'YJDL': yjdl,
'平差系数': adjustment_factor
})
factor_df = pd.DataFrame(adjustment_factors)
df_with_factors = df_area.merge(factor_df[['YJDL', '平差系数']], on='YJDL')
df_with_factors['AREA_MU'] = df_with_factors['Shape_Area'] * df_with_factors['平差系数'] * 0.0015
# print(df_with_factors)
# 生成表5.4的面积统计Excel报告
final_area_dataframe = process_data_for_table5_4(gdb_path, out_table_area, df_with_factors)
write_to_excel_table5_4(final_area_dataframe, output_excel_path)
# 生成表5.3的均值统计Excel报告
final_mean_dataframe = process_data_for_table5_7(gdb_path, out_table_mean, sample_table_name)
write_to_excel_table5_7(final_mean_dataframe, output_excel_path.replace(".xlsx", "_mean.xlsx"))
# 生成总表5.2的统计Excel报告
final_dataframe = process_data_for_table5_2(gdb_path, out_table_area, sample_table_name, df_with_factors)
write_to_excel_table5_2(final_dataframe, final_mean_dataframe, output_excel_path.replace(".xlsx", "_total.xlsx"))
return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

View File

@@ -0,0 +1,167 @@
# -*- coding: utf-8 -*-
"""
输入重分类后栅格转面要素类、乡镇边界面要素类、地类图斑要素类;
按一级地类统计土壤属性面积 和 按乡镇统计土壤属性面积;
将统计结果导出为Excel表格
将Excel表格转换为jpg图片
"""
import json
import os
from pathlib import Path
import sys
import traceback
import argparse
import win32com.client as win32
import pythoncom
import time
sys.path.append(str(Path(__file__).parent))
def parse_arguments():
"""解析命令行参数"""
parser = argparse.ArgumentParser(description="将Excel表格转换为jpg图片")
parser.add_argument("--settings_path", required=True, help="配置文件路径")
args = parser.parse_args()
if args.settings_path:
with open(args.settings_path, 'r', encoding="utf-8") as settings_file:
settings = json.load(settings_file)
area_stat_settings = settings.get("area_stat_settings", {})
else:
print_status("错误: 未找到有效配置文件")
sys.exit(1)
return area_stat_settings
def print_status(message):
"""
输出状态信息到标准输出,用于 GUI 实时显示
格式: STATUS: <message>
"""
print(f"STATUS:{message}")
sys.stdout.flush() # 确保立即输出
def print_result(success, output_path="", error_message=""):
"""
输出最终结果到标准输出,用于 GUI 判断任务状态和获取结果
格式: RESULT:True|<output_path>|
格式: RESULT:False||<error_message>
"""
if success:
print(f"RESULT:True|{output_path}|")
else:
# 在错误信息中替换换行符,避免干扰解析
cleaned_error_message = error_message.replace('\n', ' ').replace('\r', '')
print(f"RESULT:False||{cleaned_error_message}")
sys.stdout.flush() # 确保立即输出
def export_excel_to_image(excel_path, sheet_name, output_path, range_address=None):
# 检查 Excel 文件是否存在
if not os.path.exists(excel_path):
print(f"错误: Excel 文件 '{excel_path}' 不存在。请检查路径。")
return
# 确保输出目录存在
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# 初始化 COM 库
pythoncom.CoInitialize()
try:
# 1. 获取 Excel 应用程序对象
excel = win32.Dispatch("Excel.Application")
excel.Visible = False # 不显示 Excel 窗口
excel.DisplayAlerts = False # 不显示任何警告或提示框
# 2. 打开工作簿
workbook_obj = excel.Workbooks.Open(excel_path)
# 3. 选择工作表
try:
sheet = workbook_obj.Sheets(sheet_name)
except Exception:
print(f"错误: 工作簿 '{os.path.basename(excel_path)}' 中找不到工作表 '{sheet_name}'")
# 尝试选择第一个工作表作为备用
if workbook_obj.Sheets.Count > 0:
sheet = workbook_obj.Sheets(1)
print(f"改为导出第一个工作表 '{sheet.Name}'")
else:
raise ValueError("工作簿中没有可用的工作表。")
# 4. 选择要复制的区域
if range_address:
try:
range_obj = sheet.Range(range_address)
except Exception:
print(f"警告: 指定的导出范围 '{range_address}' 无效或不存在,将导出 UsedRange。")
range_obj = sheet.UsedRange
else:
range_obj = sheet.UsedRange
# 选中区域(确保焦点)
range_obj.Select()
# 5. 将选定区域复制为图片
range_obj.CopyPicture(Format=1, Appearance=2) # xlBitmap = 1, xlScreen = 2
# 6. 临时创建ChartObject在当前工作表
chart_width = range_obj.Width * (300/72) # 将点转换为厘米
chart_height = range_obj.Height * (300/65) # 将点转换为厘米
temp_chart_obj = sheet.ChartObjects().Add(0, 0, chart_width, chart_height).Chart
temp_chart_obj.Paste()
# 7. 导出图表为图片文件
temp_chart_obj.Export(output_path, FilterName="JPG")
print(f"图片已成功导出到 '{output_path}'")
# 8. 删除临时图表对象
sheet.ChartObjects(sheet.ChartObjects().Count).Delete()
# 9. 关闭工作簿,不保存更改
workbook_obj.Close(False)
except Exception as e:
print(f"处理 Excel 时发生错误: {e}")
print("请确保已安装 Microsoft Excel 应用程序,并且 Excel 文件路径、工作表名称正确。")
finally:
# 确保 Excel 应用程序被关闭
if excel:
try:
excel.Quit()
except Exception as e:
print(f"关闭 Excel 应用程序时发生错误: {e}")
# 释放 COM 对象
pythoncom.CoUninitialize()
def main():
params = None
try:
# 1. 解析参数
params = parse_arguments()
output_path = params["batch_output_folder"]
for excel_file in os.listdir(output_path):
time.sleep(1.5)
if excel_file.endswith(".xlsx"):
excel_file_path = os.path.join(output_path, excel_file)
output_jpg_path = os.path.join(output_path, excel_file.replace(".xlsx", ".jpg"))
export_excel_to_image(excel_file_path, "综合统计表", output_jpg_path)
print_status(f"已处理文件: {excel_file}")
except Exception as e:
error_msg = f"主函数错误: {str(e)}\n{traceback.format_exc()}"
print_status(error_msg)
print_result(False, error_message=error_msg)
finally:
sys.exit(0)
if __name__ == '__main__':
print_status("开始执行")
main()

310
tools/core/export_layout.py Normal file
View File

@@ -0,0 +1,310 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
导出布局脚本
此脚本可以独立运行用于导出布局不依赖于PyQt6线程
"""
import os
import sys
import json
import arcpy
import argparse
def log(message):
"""日志输出函数"""
print(message)
sys.stdout.flush() # 确保立即输出
def parse_arguments():
"""解析命令行参数"""
parser = argparse.ArgumentParser(description='导出布局')
parser.add_argument('--mode', choices=['single', 'batch'], default='single', help='导出模式')
parser.add_argument('--aprx_file_list', help='ArcGIS Pro工程文件路径')
parser.add_argument('--input_aprx_folder', help='批量模式下的工程文件夹路径')
parser.add_argument('--output_image_path', required=True, help='输出路径')
parser.add_argument('--export_format', default='PDF', help='导出格式')
parser.add_argument('--resolution', type=int, default=300, help='分辨率(DPI)')
parser.add_argument('--use_multiprocessing', action='store_true', help='是否使用多进程')
parser.add_argument('--process_count', type=int, default=2, help='进程数')
parser.add_argument('--image_force_regenerate', help='输出文件名')
args = parser.parse_args()
# 处理图层列表参数从JSON字符串转换为列表
try:
# 尝试将字符串解析为JSON
if args.aprx_file_list.startswith('[') and args.aprx_file_list.endswith(']'):
args.aprx_file_list = json.loads(args.aprx_file_list)
else:
# 如果不是JSON格式则假定是单个图层或逗号分隔的列表
if ',' in args.aprx_file_list:
cleaned = args.aprx_file_list.strip("[]")
args.aprx_file_list = [aprx_file_list.strip() for aprx_file_list in cleaned.split(',')]
else:
args.aprx_file_list = [args.aprx_file_list]
except json.JSONDecodeError:
args.aprx_file_list = [args.aprx_file_list]
return args
def get_file_extension(format_name):
"""根据格式名称获取文件扩展名"""
format_dict = {
"PDF": ".pdf",
"PNG": ".png",
"JPG": ".jpg",
"JPEG": ".jpg",
"TIFF": ".tif",
"EPS": ".eps",
"SVG": ".svg",
"AI": ".ai"
}
return format_dict.get(format_name.upper(), ".pdf")
def export_layout(params):
"""导出布局"""
aprx = None
try:
# 获取参数
log(f"开始导出布局...")
aprx_path = params['aprx_path']
output_folder = params['output_path']
export_format = params.get('export_format', 'PDF')
resolution = params.get('resolution', 300)
output_name = params.get('output_name', '')
# 确保输出文件夹存在
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# 打开地图文档
try:
log(f"打开地图文档: {aprx_path}")
aprx = arcpy.mp.ArcGISProject(aprx_path) # type: ignore
except Exception as e:
raise Exception(f"无法打开地图文档: {str(e)}")
# 获取布局
layouts = aprx.listLayouts()
if not layouts:
raise Exception("地图文档中没有布局")
# 如果未指定输出名称,则使用地图文档名称
if not output_name:
output_name = os.path.splitext(os.path.basename(aprx_path))[0]
# 获取文件扩展名
file_ext = get_file_extension(export_format)
# 导出每个布局
exported_files = []
for layout in layouts:
layout_name = layout.name
output_file = os.path.join(output_folder, f"{output_name}{file_ext}")
log(f"导出布局 {layout_name}{output_file}")
try:
if export_format.upper() == "PDF":
# 导出PDF
layout.exportToPDF(output_file, resolution=resolution)
elif export_format.upper() in ["PNG", "JPG", "JPEG", "TIFF"]:
# 导出栅格图像
layout.exportToJPEG(output_file, resolution=resolution,jpeg_quality=85) if export_format.upper() in ["JPG", "JPEG"] else None
layout.exportToPNG(output_file, resolution=resolution) if export_format.upper() == "PNG" else None
layout.exportToTIFF(output_file, resolution=resolution) if export_format.upper() == "TIFF" else None
elif export_format.upper() in ["EPS", "SVG"]:
# 导出矢量图像
layout.exportToEPS(output_file, resolution=resolution) if export_format.upper() == "EPS" else None
layout.exportToSVG(output_file, resolution=resolution) if export_format.upper() == "SVG" else None
else:
# 默认导出PDF
layout.exportToPDF(output_file, resolution=resolution)
exported_files.append(output_file)
log(f"成功导出布局 {layout_name}{output_file}")
except Exception as e:
log(f"导出布局 {layout_name} 失败: {str(e)}")
return {
'exported_files': exported_files,
'count': len(exported_files)
}
except Exception as e:
log(f"导出布局失败: {str(e)}")
raise
finally:
# 释放资源
if aprx:
del aprx
arcpy.management.ClearWorkspaceCache()
def batch_export_layout(params):
"""批量导出布局"""
try:
# 获取参数
log(f"开始批量导出布局...")
aprx_files = params['aprx_files']
output_folder = params['output_path']
export_format = params.get('export_format', 'PDF')
resolution = params.get('resolution', 300)
# 确保输出文件夹存在
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# 记录导出结果
all_exported_files = []
success_count = 0
failed_count = 0
# 逐个处理aprx文件
for aprx_path in aprx_files:
try:
file_name = os.path.splitext(os.path.basename(aprx_path))[0]
log(f"\n处理文件: {file_name}")
# 准备参数
export_params = {
'aprx_path': aprx_path,
'output_path': output_folder,
'export_format': export_format,
'resolution': resolution,
'output_name': file_name
}
# 调用导出布局函数
result = export_layout(export_params)
all_exported_files.extend(result['exported_files'])
success_count += result['count']
log(f"文件 {file_name} 处理完成")
except Exception as e:
log(f"处理文件 {os.path.basename(aprx_path)} 失败: {str(e)}")
failed_count += 1
# 返回结果
log(f"\n批量导出完成")
log(f"成功: {success_count} 个布局")
log(f"失败: {failed_count} 个文件")
return {
'exported_files': all_exported_files,
'count': success_count,
'success_count': success_count,
'failed_count': failed_count
}
except Exception as e:
log(f"批量导出布局失败: {str(e)}")
raise
def export_worker(aprx_path, output_path, export_format, resolution):
"""子进程专用工作函数(保持最小化参数)"""
try:
# 每个子进程独立初始化ArcPy环境
import arcpy
arcpy.env.overwriteOutput = True
result = export_layout({
'aprx_path': aprx_path,
'output_path': output_path,
'export_format': export_format,
'resolution': resolution
})
return (True, aprx_path, result)
except Exception as e:
return (False, aprx_path, str(e))
def main():
"""主函数"""
try:
args = parse_arguments()
if len(args.aprx_file_list) == 1:
aprx_file = args.aprx_file_list[0]
if not os.path.exists(aprx_file):
log(f"所选文件{aprx_file}不存在,请确认")
return 1
# 准备参数
params = {
'aprx_path': aprx_file,
'output_path': args.output_image_path,
'export_format': args.export_format,
'resolution': args.resolution
}
# 调用导出函数
result = export_layout(params)
log(f"导出完成,成功导出 {result['count']} 个布局")
elif len(args.aprx_file_list) >1:
if not args.input_aprx_folder:
log("批量导出模式需要指定aprx_folder参数")
return 1
# 查找所有aprx文件
aprx_files = []
valied_files = []
failed_files = []
for file in args.aprx_file_list:
if not os.path.exists(file) and file.lower().endswith('.aprx'):
failed_files.append(os.path.basename(file))
continue
aprx_files.append(file)
valied_files.append(os.path.basename(file))
if not aprx_files:
log(f"在指定文件夹中未找到aprx文件: {args.input_aprx_folder}")
return 1
log(f"找到 {len(valied_files)} 个有效aprx文件: {', '.join(valied_files)}\n")
log(f"{len(failed_files)} 个无效文件: {', '.join(failed_files)}")
if args.use_multiprocessing and args.process_count > 1 and len(aprx_files)>1:
from multiprocessing import Pool
tasks = [(aprx_file, args.output_image_path, args.export_format, args.resolution) for aprx_file in aprx_files]
with Pool(min(int(args.process_count), len(tasks))) as p:
results = p.starmap(export_worker, tasks)
for success, aprx_path, result in results:
if success:
log(f"成功导出布局 {aprx_path}{result['exported_files']}") # type: ignore
else:
log(f"导出布局 {aprx_path} 失败: {result}")
return 0
else:
# 准备参数
params = {
'aprx_files': aprx_files,
'output_path': args.output_image_path,
'export_format': args.export_format,
'resolution': args.resolution
}
# 调用批量导出函数
result = batch_export_layout(params)
log(f"批量导出完成,成功导出 {result['count']} 个布局")
return 0
else:
log("请选择要处理的aprx文件")
return 0
except Exception as e:
log(f"导出失败: {str(e)}")
return 1
if __name__ == "__main__":
sys.exit(main())

572
tools/core/export_map_v1.py Normal file
View File

@@ -0,0 +1,572 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
导出地图脚本
此脚本可以独立运行,用于导出地图,支持批量导出
"""
import os
import sys
import json
import time
import arcpy
import argparse
from pathlib import Path
from collections import defaultdict
sys.path.append(str(Path(__file__).parent))
from utils import common_utils
def parse_arguments():
"""解析命令行参数"""
parser = argparse.ArgumentParser(description="导出地图工具")
parser.add_argument("--config_file", required=True, help="配置文件路径")
parser.add_argument("--county_name", required=True, help="区县名称")
parser.add_argument("--polygon_list", required=True, help="要导出的图层列表JSON格式字符串")
parser.add_argument("--template_aprx_file", required=True, help="模板文件路径")
parser.add_argument("--output_path", required=True, help="输出路径")
parser.add_argument("--data_source_path", required=True, help="数据源路径")
parser.add_argument("--symbol_path", required=True, help="符号文件路径")
parser.add_argument("--force_regenerate", action="store_true", help="强制重新生成工程文件")
parser.add_argument("--pic_path", required=True, help="图片输入路径")
# 解析参数
args = parser.parse_args()
# 处理图层列表参数从JSON字符串转换为列表
try:
# 尝试将字符串解析为JSON
if args.polygon_list.startswith('[') and args.polygon_list.endswith(']'):
args.polygon_list = json.loads(args.polygon_list)
else:
# 如果不是JSON格式则假定是单个图层或逗号分隔的列表
if ',' in args.polygon_list:
cleaned = args.polygon_list.strip("[]")
args.polygon_list = [layer.strip() for layer in cleaned.split(',')]
else:
args.polygon_list = [args.polygon_list]
except json.JSONDecodeError:
args.polygon_list = [args.polygon_list]
return args
def print_status(message):
"""
输出状态信息到标准输出,用于 GUI 实时显示
格式: STATUS: <message>
"""
print(f"STATUS:{message}")
sys.stdout.flush() # 确保立即输出
def print_result(success, output_path="", error_message=""):
"""
输出最终结果到标准输出,用于 GUI 判断任务状态和获取结果
格式: RESULT:True|<output_path>|
格式: RESULT:False||<error_message>
"""
if success:
print(f"RESULT:True|{output_path}|")
else:
# 在错误信息中替换换行符,避免干扰解析
cleaned_error_message = error_message.replace('\n', ' ').replace('\r', '')
print(f"RESULT:False||{cleaned_error_message}")
sys.stdout.flush() # 确保立即输出
def log_arcpy_message(message):
"""输出 ArcPy 产生的 geoprocessing 消息"""
# 可以在这里进一步处理或过滤 ArcPy 消息
if message.type == 'Message':
print_status(f"ArcPy消息: {message.message}")
elif message.type == 'Warning':
print_status(f"ArcPy警告: {message.message}")
elif message.type == 'Error':
# 对于错误,也可以记录到标准错误
print_status(f"ArcPy错误: {message.message}")
sys.stderr.write(f"ArcPyError:{message.message}\n")
sys.stderr.flush()
def area_statistics_by_field(layer, field_name="GRIDCODE", area_unit="HECTARES"):
"""计算要素的面积统计信息"""
area_stats = defaultdict(float, {1: 0.00, 2: 0.00, 3: 0.00, 4: 0.00, 5: 0.00})
try:
# 检查图层是否有效
if not layer or not layer.isFeatureLayer:
raise ValueError("输入图层无效或不是要素图层")
# 检查字段是否存在
if field_name not in [f.name for f in arcpy.ListFields(layer)]:
raise ValueError(f"字段 '{field_name}' 在图层中不存在")
# 判断坐标系类型
desc = arcpy.Describe(layer.dataSource)
is_geographic = desc.spatialReference.type == "Geographic"
if is_geographic:
print_status("图层坐标系为地理坐标系,计算面积可能不准确")
# 创建游标遍历要素
with arcpy.da.SearchCursor(layer, ["SHAPE@", field_name]) as cursor:
for row in cursor:
geometry = row[0]
value = row[1]
# 计算面积
area = geometry.getArea("GEODESIC" if is_geographic else "PLANAR", area_unit)
# 根据分类进行统计
if value in area_stats:
area_stats[value] += area
else:
area_stats[value] = area
return area_stats
except Exception as e:
raise Exception(f"计算面积统计信息失败: {str(e)}")
def update_text_elements(layout, config_data, county_name):
"""更新布局中的文本元素"""
# 更新标题和其他文本元素
for element_name, element_content in config_data.items():
if element_name == "项目名称":
new_text = element_content.replace('{区县占位符}', county_name)
text_element = layout.listElements("TEXT_ELEMENT", element_name)[0]
text_element.text = new_text
if element_name == "分析方法":
text_element = layout.listElements("TEXT_ELEMENT", element_name)[0]
text_element.text = element_content
print_status(f"文本元素更新成功")
def update_data_source(layer, layer_name, new_data_source):
"""更新图层数据源"""
try:
# 确保路径分隔符正确
new_data_source = os.path.normpath(new_data_source)
layer_path = os.path.join(new_data_source, layer_name)
# 检查数据源是否存在
if not arcpy.Exists(layer_path):
raise ValueError(f"数据源不存在: {layer_path}")
# 更新数据源
cp = layer.connectionProperties
cp["connection_info"]["database"] = new_data_source
cp["dataset"] = layer_name
cp["workspace_factory"] = "File Geodatabase"
layer.updateConnectionProperties(layer.connectionProperties, cp)
print_status(f"数据源更新成功")
except Exception as e:
print_status(f"更新数据源失败: {str(e)}")
raise
def update_symbol_system(layer, layer_name, symbol_path):
"""
更新符号系统
1.ApplySymbologyFromLayer循环更新不起作用还没找到原因
2.替代方案: lf = arcpy.mp.LayerFile(symbol_file)
layer.symbology = lf.listLayers()[0].symbology
"""
try:
# 获取符号文件
if os.path.isfile(symbol_path) and symbol_path.endswith('.lyr'):
symbol_file = symbol_path
elif os.path.isdir(symbol_path):
symbol_file = None
for file in os.listdir(symbol_path):
if (file.endswith('.lyr') or file.endswith('.lyrx')) and layer_name in file:
symbol_file = os.path.join(symbol_path, file)
break
if not symbol_file:
raise FileNotFoundError(f"符号系统中未找到匹配 {layer_name} 的.lyr文件")
else:
raise FileNotFoundError("符号系统路径必须是有效的.lyr文件或文件夹")
# 更新符号系统
lf = arcpy.mp.LayerFile(symbol_file) # type: ignore
layer.symbology = lf.listLayers()[0].symbology
# arcpy.management.ApplySymbologyFromLayer(layer, symbol_file, update_symbology="MAINTAIN")
print_status(f"符号系统更新成功")
except Exception as e:
print_status(f"更新符号系统失败: {str(e)}")
# 添加注记
def add_annotation(map_scale, map_obj, label_layer,layer_name, new_data_source):
"""
标注转注记
标注存在缓存机制和layer.name绑定需修改layer.name
"""
try:
# 检查图层是否为空
if label_layer is None:
print_status(f"图层对象为空,无法进行标注转注记")
return
# 转换为注记
anno_layer_name = f"{layer_name}_GDBAnno"
output_anno = os.path.join(new_data_source, anno_layer_name)
try:
# 1. 从地图中移除所有相关图层
for lyr in map_obj.listLayers("*GDBAnno"):
map_obj.removeLayer(lyr)
label_layer.showLabels = False
# 将新生成的注记图层添加到地图中
if arcpy.Exists(output_anno):
new_anno_layer = map_obj.addDataFromPath(output_anno)
map_obj.moveLayer(label_layer, new_anno_layer)
print_status(f" 成功添加注记图层: {new_anno_layer.name}")
else:
raise FileNotFoundError(f"注记图层不存在: {output_anno}")
except Exception as e:
print_status(f"执行标注转注记工具时出错: {str(e)}")
raise Exception(f"执行标注转注记工具时出错: {str(e)}")
except Exception as e:
print_status(f"标注转注记失败: {str(e)}")
# 打印详细错误信息
import traceback
print_status(traceback.format_exc())
def label_to_annotation(map_scale, map_obj, label_layer,layer_name, new_data_source):
"""
标注转注记
标注存在缓存机制和layer.name绑定需修改layer.name
"""
try:
# 检查图层是否为空
if label_layer is None:
print_status(f"图层对象为空,无法进行标注转注记")
return
# 尝试开启标注
if label_layer.supports("SHOWLABELS"):
try:
label_layer.showLabels = True
except Exception as e:
print_status(f"启用标注失败: {str(e)}")
# 转换为注记
anno_layer_name = f"{layer_name}_GDBAnno"
output_anno = os.path.join(new_data_source, anno_layer_name)
try:
# 1. 从地图中移除所有相关图层
for lyr in map_obj.listLayers("*GDBAnno"):
map_obj.removeLayer(lyr)
if arcpy.Exists(output_anno):
# 2. 强制释放工作空间锁
arcpy.management.ClearWorkspaceCache(new_data_source)
# 3. 带重试机制的要素类删除
max_retries = 5
for attempt in range(max_retries):
try:
arcpy.management.Delete(output_anno)
except arcpy.ExecuteError as e:
if "000464" in str(e) and attempt < max_retries - 1:
time.sleep((attempt + 1) * 3)
arcpy.management.ClearWorkspaceCache(new_data_source)
continue
raise
# 4. 最终存在性检查
if arcpy.Exists(output_anno):
raise RuntimeError("无法彻底删除旧注记要素类")
except Exception as e:
raise Exception(f"清除旧注记失败,{str(e)}")
try:
# 使用ConvertLabelsToAnnotation工具
arcpy.cartography.ConvertLabelsToAnnotation(
input_map=map_obj,
conversion_scale=map_scale,
output_geodatabase=new_data_source,
anno_suffix="_GDBAnno",
extent="DEFAULT",
output_group_layer=f"{layer_name}GDBAnno",
which_layers="SINGLE_LAYER",
single_layer=label_layer
)
print_status(f"标注转注记成功")
# 关闭原始标注显示(可选)
label_layer.showLabels = False
# 将新生成的注记图层添加到地图中
if arcpy.Exists(output_anno):
new_anno_layer = map_obj.addDataFromPath(output_anno)
map_obj.moveLayer(label_layer, new_anno_layer)
print_status(f" 成功添加注记图层: {new_anno_layer.name}")
else:
raise FileNotFoundError(f"注记图层不存在: {output_anno}")
except Exception as e:
print_status(f"执行标注转注记工具时出错: {str(e)}")
raise Exception(f"执行标注转注记工具时出错: {str(e)}")
except Exception as e:
print_status(f"标注转注记失败: {str(e)}")
# 打印详细错误信息
import traceback
print_status(traceback.format_exc())
def export_map(params):
"""导出地图"""
start_time = time.time()
aprx = None
try:
# 获取参数
export_config = params['export_config']
county_name = params['county_name']
template_aprx_file = params['template_aprx_file']
output_path = params['output_path']
data_source_path = params['data_source_path']
symbol_path = params['symbol_path']
force_regenerate = params.get('force_regenerate', False) # 是否强制重新生成工程文件
polygon_list = params['polygon_list']
pic_path = params.get('pic_path', None)
# 结果记录
success_count = 0
# 确保输出文件夹存在
if not os.path.exists(output_path):
os.makedirs(output_path)
# 创建工作空间
workspace_path = os.path.join(output_path, f"{county_name}_工作空间")
if not os.path.exists(workspace_path) or force_regenerate:
if os.path.exists(workspace_path):
print_status(f"强制重新生成,删除现有工作空间:{workspace_path}")
# 删除旧工作空间可能需要arcpy函数
os.makedirs(workspace_path, exist_ok=True)
print_status(f"创建工作空间:{workspace_path}")
# 工程模板不存在则返回
if not arcpy.Exists(template_aprx_file):
raise Exception("模板文件不存在")
# 设置工作空间
orginal_workspace = arcpy.env.workspace
arcpy.env.workspace = workspace_path
aprx = arcpy.mp.ArcGISProject(template_aprx_file) # type: ignore
# 获取指定布局
target_layout = None
layout_name = "属性图模板"
target_layout = aprx.listLayouts(layout_name)[0]
if not target_layout:
raise Exception(f"未找到布局: {layout_name}")
# 获取当前地图比例尺
map_frame = target_layout.listElements("MAPFRAME_ELEMENT", "地图框")[0]
# if isinstance(map_frame, arcpy.mp.MapFrame):
map_scale = map_frame.camera.scale
# else:
# raise Exception("地图框元素不存在")
# 获取指定地图
target_map = None
map_name = "土壤属性图层"
target_map = aprx.listMaps(map_name)[0]
if not target_map:
raise Exception(f"未找到地图: {map_name}")
# 获取指定图层
target_layer = None
target_layer_name = "属性图"
try:
target_layer = target_map.listLayers(target_layer_name)[0]
if not target_layer:
raise Exception(f"未找到图层: {target_layer_name}")
except Exception as e:
raise Exception(f"错误信息: {str(e)}")
# 循环处理每个图层
for layer_name in polygon_list:
print_status(f"===== 开始处理要素: {layer_name} =====")
config_key = common_utils.get_config_key(layer_name)
try:
# 更新图层名
target_layer.name = config_key
# 获取图层配置
single_export_config = export_config.get(config_key, {})
if not single_export_config:
print_status(f"警告: 未找到 {layer_name} 的配置信息, 将处理下一个")
continue
# 生成输出文件名和路径
temp_file_name = single_export_config['项目名称'].split('\n')[1]
file_name = temp_file_name.replace('{区县占位符}', county_name)
output_path = os.path.join(workspace_path, f"{file_name}.aprx")
# 检查工程文件是否已存在
if os.path.exists(output_path) and not force_regenerate:
print_status(f"工程文件已存在: {file_name}.aprx, 将直接使用")
success_count += 1
continue
# 检查数据源是否存在
data_layer_path = os.path.join(data_source_path, layer_name)
if not arcpy.Exists(data_layer_path):
print_status(f"警告: 数据源不存在: {layer_name}要素,跳过此图层")
continue
# 更新数据源
if data_source_path:
try:
update_data_source(target_layer, layer_name, data_source_path)
except Exception as e:
print_status(f"更新数据源时出错: {str(e)},跳过此图层")
continue
# 更新符号系统
if os.path.exists(symbol_path):
try:
update_symbol_system(target_layer, config_key, symbol_path)
except Exception as e:
print_status(f"更新符号系统时出错: {str(e)},但将继续处理")
# 更新文本元素
if target_layout:
try:
update_text_elements(target_layout, single_export_config, county_name)
except Exception as e:
print_status(f"更新文本元素时出错: {str(e)},但将继续处理")
# 替换图片
try:
for pic in os.listdir(pic_path):
if pic.endswith(".jpg") and pic.startswith(config_key):
pic_file = os.path.join(pic_path, pic)
break
pic_element = target_layout.listElements("PICTURE_ELEMENT", "统计图片")[0]
# if isinstance(pic_element, arcpy.mp.PictureElement):
pic_element.sourceImage = pic_file
print_status(f"图片替换成功,{pic_file}")
# else:
# print_status(f"未找到统计图片元素,无法替换图片")
except Exception as e:
print_status(f"替换图片时出错: {str(e)},但将继续处理")
# 如果存在注记 直接添加;如果不存在,则尝试转注记
try:
add_annotation(map_scale, target_map, target_layer, config_key, data_source_path)
except Exception as e:
print_status(f"添加注记失败: {str(e)},但将继续处理")
# 标注转注记
# try:
# label_to_annotation(map_scale, target_map, target_layer, config_key, data_source_path)
# except Exception as e:
# print_status(f"标注转注记失败: {str(e)},但将继续处理")
# 保存工程文件
aprx.saveACopy(output_path)
print_status(f"成功保存工程文件: {output_path}")
# 记录结果
success_count += 1
arcpy.management.ClearWorkspaceCache()
except Exception as e:
print_status(f"处理图层 {config_key} 时出错: {str(e)}")
continue
print_status(f"===== 导出处理完成 =====")
# 结束时间
end_time = time.time()
elapsed_time = end_time - start_time
print_status(f"共处理 {len(polygon_list)} 个图层,成功 {success_count} 个,耗时:{elapsed_time:.2f}")
return success_count
except Exception as e:
print_status(f"导出过程中出错:{str(e)}")
import traceback
print_status(traceback.format_exc())
return 0
finally:
# 确保释放资源
arcpy.management.ClearWorkspaceCache()
arcpy.env.workspace = orginal_workspace
if 'target_layer' in locals():
del target_layer
if 'target_map' in locals():
del target_map
if 'target_layout' in locals():
del target_layout
if 'aprx' in locals():
del aprx
def main():
"""主函数"""
try:
# args1 = {
# 'config_file': 'D:/arcpystudy/ArcGisPro/tools/ui/raster_test_config.json',
# 'county_name': '澜沧拉祜族自治县',
# 'polygon_list': ['AB_processed.shp'],
# 'template_aprx_file': r'D:/工作/ArcGisPro/澜沧县模板/澜沧县模板/澜沧县模板.aprx',
# 'output_path': 'D:/工作/三普成果编制/澜沧2/成果图',
# 'data_source_path': 'D:/工作/三普成果编制/澜沧2/@矢量数据',
# 'symbol_path': 'D:/工作/ArcGisPro/澜沧县模板/2.配色/华南配色',
# 'force_regenerate': True,
# 'pic_path': 'D:/工作/三普成果编制/澜沧2/@基础数据/面积统计表格'
# }
# # 将字典转为对象
# args = argparse.Namespace(**args1)
# 解析命令行参数
args = parse_arguments()
# 执行导出
with open(args.config_file,'r',encoding='utf-8') as f:
config = json.load(f)
params = {
"export_config": config["export_config"],
"county_name": args.county_name,
"polygon_list":args.polygon_list,
"template_aprx_file": args.template_aprx_file,
"output_path": args.output_path,
"data_source_path": args.data_source_path,
"symbol_path": args.symbol_path,
"force_regenerate":args.force_regenerate,
"pic_path":args.pic_path
}
success_count = export_map(params)
# 返回结果
if success_count > 0:
return 0
else:
print_status("没有导出任何图层")
return 1
except Exception as e:
print_status(f"错误:{str(e)}")
import traceback
print_status(traceback.format_exc())
return 1
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,475 @@
# -*- coding: utf-8 -*-
"""
栅格处理模块: 提供栅格重分类、栅格转矢量和小面积图斑消除功能
设计用于通过 QProcess 调用,接收命令行参数,并通过标准输出返回结果和状态。
"""
import argparse
import json
import os
import random
import sys
import time
import traceback
import arcpy
import uuid
from pathlib import Path
from tools.core.utils.os_utils import temp_files_processor
try:
from utils import common_utils
except ImportError:
print("错误: 未找到 utils 模块。请确保 utils.py 文件存在或已添加到 PYTHONPATH。")
sys.exit(1)
def parse_arguments():
"""解析命令行参数"""
parser = argparse.ArgumentParser(description='处理栅格数据:重分类、转矢量、消除小图斑')
parser.add_argument('--input_raster', required=True, help='输入栅格文件路径')
parser.add_argument('--settings_path', required=True, help='配置文件路径')
args = parser.parse_args()
if args.settings_path:
with open(args.settings_path, 'r', encoding="utf-8") as settings_file:
settings = json.load(settings_file)
raster_settings = settings.get("raster_settings", {})
if raster_settings:
standards_dict_path = raster_settings.get("config_file_path", "")
with open(standards_dict_path, 'r', encoding="utf-8") as standards_file:
standards_dict = json.load(standards_file)
raster_name = Path(args.input_raster).stem
remap_table = common_utils.create_remap_table(standards_dict['export_config'][raster_name]["标准等级"])
raster_settings["remap_table"] = remap_table
raster_settings["input_raster"] = args.input_raster
else:
print("错误: 未找到有效配置文件")
sys.exit(1)
return raster_settings
def print_status(message):
"""
输出状态信息到标准输出,用于 GUI 实时显示
格式: STATUS: <message>
"""
print(f"STATUS:{message}")
sys.stdout.flush() # 确保立即输出
def print_result(success, output_path="", error_message=""):
"""
输出最终结果到标准输出,用于 GUI 判断任务状态和获取结果
格式: RESULT:True|<output_path>|
格式: RESULT:False||<error_message>
"""
if success:
print(f"RESULT:True|{output_path}|")
else:
# 在错误信息中替换换行符,避免干扰解析
cleaned_error_message = error_message.replace('\n', ' ').replace('\r', '')
print(f"RESULT:False||{cleaned_error_message}")
sys.stdout.flush() # 确保立即输出
def log_arcpy_message(message):
"""输出 ArcPy 产生的 geoprocessing 消息"""
# 可以在这里进一步处理或过滤 ArcPy 消息
if message.type == 'Message':
print_status(f"ArcPy消息: {message.message}")
elif message.type == 'Warning':
print_status(f"ArcPy警告: {message.message}")
elif message.type == 'Error':
# 对于错误,也可以记录到标准错误
print_status(f"ArcPy错误: {message.message}")
sys.stderr.write(f"ArcPyError:{message.message}\n")
sys.stderr.flush()
# --- 核心处理函数 ---
def reclassify_raster(input_raster, remap_table, temp_files_to_clean):
"""
根据重分类映射表重分类栅格数据,结果存储在内存中。
参数:
input_raster (str): 输入栅格路径
remap_table (list): 重分类映射表,格式为 [[from, to, new_value], ...]
temp_files_to_clean (list): 用于收集临时文件路径的列表
返回:
str: 内存栅格路径
"""
print_status(f"开始重分类栅格: {input_raster}")
try:
# 确保remap_table中的new_value是整数并确保格式正确
corrected_remap_table = []
for item in remap_table:
try:
if len(item) == 3:
from_value, to_value, new_value = item
# 尝试转换为浮点数以处理范围值
try:
from_value = float(from_value)
except (ValueError, TypeError):
print_status(f"警告: 跳过无效的重分类项起始值: {item[0]}")
continue
if isinstance(to_value, (int, float)) and to_value == float('inf'):
to_value = 10000 # 用一个很大的数值代替
elif isinstance(to_value, (int, float)) and to_value == float('-inf'):
to_value = -10000 # 用一个很小的数值代替
else:
try:
to_value = float(to_value)
except (ValueError, TypeError):
print_status(f"警告: 跳过无效的重分类项结束值: {item[1]}")
continue
try:
new_value = int(new_value)
except (ValueError, TypeError):
print_status(f"警告: 跳过无效的重分类项新值: {item[2]}")
continue
# 验证范围的有效性
if from_value > to_value:
print_status(f"警告: 跳过无效范围: {from_value} > {to_value}")
continue
corrected_remap_table.append([from_value, to_value, new_value])
else:
print_status(f"警告: 跳过无效的重分类项格式: {item}")
except Exception as e: # 捕获更广泛的异常
print_status(f"处理重分类项 {item} 时出错: {e}")
if not corrected_remap_table:
raise ValueError("重分类映射表为空或无效,无法执行重分类。")
# 创建重分类对象
remap = arcpy.sa.RemapRange(corrected_remap_table)
# 执行重分类到内存
# 检查输入栅格是否存在
if not arcpy.Exists(input_raster):
raise FileNotFoundError(f"输入栅格不存在: {input_raster}")
# 设置Snap Raster和Mask环境如果需要的话
# arcpy.env.snapRaster = input_raster
# arcpy.env.mask = input_raster # 如果需要按栅格范围裁剪
out_raster = arcpy.sa.Reclassify(input_raster, "VALUE", remap, "DATA")
# 保存到内存工作空间
temp_reclass_raster_mem = f"in_memory/reclass_int_{uuid.uuid4().hex[:8]}"
# 在返回之前保存到内存,并添加到清理列表
out_raster.save(temp_reclass_raster_mem)
temp_files_to_clean.append(temp_reclass_raster_mem)
print_status(f"重分类已完成: {temp_reclass_raster_mem}")
return temp_reclass_raster_mem
except Exception as e:
print_status(f"重分类过程出错: {str(e)}")
# 记录ArcPy消息
for msg in arcpy.GetMessages(2).split('\n'):
if msg:
print_status(f"ArcPy重分类错误详情: {msg}")
raise # 重新抛出异常
def eliminate_small_polygons(input_polygon, output_polygon, min_area, area_unit, temp_files_to_clean, current_iter=1, max_iterations=8, start_area=1000):
"""
递归消除小于指定面积的多边形,直至没有小图斑或达到最大迭代次数。
参数:
input_polygon (str): 输入多边形要素类路径
output_polygon (str): 最终输出多边形要素类路径
min_area (float): 最小面积阈值
area_unit (str): 面积单位
max_iterations (int): 最大递归次数
current_iter (int): 当前迭代次数
temp_files_to_clean (list): 用于收集临时文件路径的列表
返回:
str: 最终输出多边形要素类路径
"""
# print_status(f"开始第 {current_iter} 次消除小图斑...")
# 检查输入多边形是否存在
if not arcpy.Exists(input_polygon):
raise FileNotFoundError(f"输入多边形不存在: {input_polygon}")
try:
# 如果是第一次迭代,检查输出文件是否已存在,并删除
if current_iter == 1 and arcpy.Exists(output_polygon):
try:
arcpy.management.Delete(output_polygon)
print_status(f"已删除现有输出文件: {output_polygon}")
except Exception as delete_err:
print_status(f"警告: 无法删除现有输出文件 {output_polygon}: {str(delete_err)}")
#==================增加逐面积消除================
# 计算当前迭代的面积阈值
current_threshold = start_area * (2 ** (current_iter - 1))
# 如果当前阈值超过最终阈值,使用最终阈值
if current_threshold > min_area:
current_threshold = min_area
#==================增加逐面积消除================
# 创建临时内存图层以便选择
temp_layer = f"input_lyr_{uuid.uuid4().hex[:8]}"
arcpy.management.MakeFeatureLayer(input_polygon, temp_layer)[0]
# print_status(f"已创建临时图层: {temp_layer_name}")
# 添加或检查面积字段
area_field_name = "TEMP_AREA"
fields = [f.name for f in arcpy.ListFields(temp_layer)]
if area_field_name not in fields:
arcpy.management.AddField(temp_layer, area_field_name, "DOUBLE")
# print_status(f"已添加临时面积字段: {area_field_name}")
# 计算面积
arcpy.management.CalculateGeometryAttributes(
temp_layer,
[[area_field_name, "AREA"]],
None,
area_unit,
None,
"SAME_AS_INPUT"
)
# print_status("面积计算完成.")
# 选择小于阈值的多边形
selection_query = f"{arcpy.AddFieldDelimiters(temp_layer, area_field_name)} < {current_threshold}"
# print_status(f"选择查询: {selection_query}")
arcpy.management.SelectLayerByAttribute(temp_layer, "NEW_SELECTION", selection_query)
# 检查选中的要素数量
count = int(arcpy.management.GetCount(temp_layer).getOutput(0))
# print_status(f"发现 {count} 个小于 {min_area} {area_unit} 的小图斑.")
# 判断是否停止迭代
if count == 0 or current_iter >= max_iterations:
# print_status(f"复制最终结果到: {output_polygon}")
arcpy.management.CopyFeatures(input_polygon, output_polygon)
# 删除临时面积字段 (可选,如果需要保持输出干净)
# if area_field_name in fields: # 仅在字段是我们添加的情况下删除
# arcpy.management.DeleteField(output_polygon, area_field_name)
return output_polygon
else:
# print_status(f"执行消除操作...")
temp_eliminate_output = f"in_memory/temp_eliminate_{uuid.uuid4().hex[:8]}"
temp_files_to_clean.append(temp_eliminate_output) # 添加到临时文件列表
# 执行消除操作
arcpy.Eliminate_management(temp_layer, temp_eliminate_output, "LENGTH")
# print_status(f"执行融合操作...")
temp_dissolve_output = f"in_memory/dissolve_polygons_{uuid.uuid4().hex[:8]}"
temp_files_to_clean.append(temp_dissolve_output)
# 添加融合字段
dissolve_fields = ["gridcode"]
arcpy.management.Dissolve(temp_eliminate_output, temp_dissolve_output, dissolve_fields, multi_part="SINGLE_PART")
# print_status(f"融合结果已保存到内存: {temp_dissolve_output}")
# 递归调用,使用融合后的结果作为下一次迭代的输入
return eliminate_small_polygons(
temp_dissolve_output, # 下一次迭代使用临时融合输出作为输入
output_polygon,
min_area,
area_unit,
temp_files_to_clean,
current_iter + 1,
max_iterations,
start_area
)
except Exception as e:
print_status(f"消除小面积多边形过程出错 (迭代 {current_iter}): {str(e)}")
# 记录ArcPy消息
for msg in arcpy.GetMessages(2).split('\n'):
if msg:
print_status(f"ArcPy消除错误详情: {msg}")
raise # 重新抛出异常
finally:
# 确保删除临时图层,即使出错
if 'temp_layer' in locals() and arcpy.Exists(temp_layer):
try:
arcpy.management.Delete(temp_layer)
# print_status(f"已删除临时图层: {temp_layer.name}")
except Exception as delete_layer_err:
print_status(f"警告: 无法删除临时图层 {temp_layer}: {str(delete_layer_err)}")
# --- 主处理逻辑 ---
def main():
"""主函数:解析参数,执行处理流程,输出结果和状态"""
params = None
temp_files_to_clean = []
original_workspace = None
try:
# 1. 解析参数
params = parse_arguments()
input_raster = params["input_raster"]
raster_name = Path(input_raster).stem
input_folder = params["input_folder"]
output_folder = params["batch_output_folder"]
clip_features = params["clip_features"]
clip_enabled = params["clip_enabled"]
remap_table = params["remap_table"]
min_area = params["min_area"]
area_unit = params["area_unit"]
simplify = params["simplify"]
# print_status(f"解析参数完成: {params}")
# 2. 设置工作空间和环境
original_workspace = arcpy.env.workspace
arcpy.env.workspace = input_folder
arcpy.env.overwriteOutput = True
print_status(f"ArcPy 工作空间设置为: {arcpy.env.workspace}")
# 3. 校验输入/输出路径
if not arcpy.Exists(input_raster):
raise FileNotFoundError(f"输入栅格文件不存在: {input_raster}")
# 创建输出文件夹
if not os.path.exists(output_folder):
os.makedirs(output_folder)
print_status(f"已创建输出文件夹: {output_folder}")
# 创建面积统计用文件夹
disk_output_path = os.path.join(output_folder, "面积统计用栅格面")
time.sleep(random.random())
if not os.path.exists(disk_output_path):
os.makedirs(disk_output_path)
print_status(f"已创建面积统计文件夹: {disk_output_path}")
if clip_enabled and not arcpy.Exists(clip_features):
raise FileNotFoundError(f"裁剪要素类不存在: {clip_features}")
# 4. 定义中间和最终输出路径
# 根据输出文件夹类型确定文件扩展名和命名方式
output_is_workspace = common_utils.get_data_type(output_folder) in ["Workspace", "FeatureDataset", "Geodatabase"]
if output_is_workspace:
# 输出到地理数据库或要素数据集
final_output_path = os.path.join(os.path.dirname(output_folder), f"{raster_name}_eliminate.shp")
# final_output_path = f"{raster_name}_processed"
else:
# 输出到文件夹 (例如 Shapefile)
final_output_path = os.path.join(output_folder, f"{raster_name}_eliminate.shp")
# final_output_path = f"{raster_name}_processed.shp"
# 5. 执行重分类 (如果 remap_table 存在)
if remap_table:
reclassed_raster = reclassify_raster(input_raster, remap_table, temp_files_to_clean)
arcpy.Raster(reclassed_raster).save(os.path.join(output_folder,f"{Path(input_raster).stem}分级后.tif"))
# 6. 栅格转多边形
print_status(f"开始栅格转多边形: {reclassed_raster}")
temp_polygon_output = os.path.join("in_memory", f"raster_to_polygon_{uuid.uuid4().hex[:8]}")
temp_files_to_clean.append(temp_polygon_output)
simplify_value = "SIMPLIFY" if simplify else "NO_SIMPLIFY"
arcpy.conversion.RasterToPolygon(reclassed_raster, temp_polygon_output, simplify_value, "VALUE")
print_status(f"栅格转多边形完成,结果在内存: {temp_polygon_output}")
# 将内存中的要素类保存到硬盘
disk_output_polygon = os.path.join(disk_output_path, f"{raster_name}_reclassed_polygon.shp")
arcpy.CopyFeatures_management(temp_polygon_output, disk_output_polygon)
print_status(f"已将重分类转面结果保存到硬盘: {disk_output_polygon}")
# 7. 裁剪 (如果 clip_features 存在)
current_polygon_source = temp_polygon_output
if clip_enabled:
print_status(f"开始裁剪要素类: {current_polygon_source} using {clip_features}")
temp_cliped_polygon_output = os.path.join("in_memory", f"cliped_{uuid.uuid4().hex[:8]}")
temp_files_to_clean.append(temp_cliped_polygon_output)
arcpy.analysis.Clip(current_polygon_source, clip_features, temp_cliped_polygon_output)
current_polygon_source = temp_cliped_polygon_output
print_status(f"裁剪完成,结果在内存: {current_polygon_source}")
# 多部件至单部件 (通常在裁剪后进行,确保每个要素都是独立的单部件)
print_status(f"开始多部件至单部件转换: {current_polygon_source}")
temp_multi_to_single_output = os.path.join("in_memory", f"single_{uuid.uuid4().hex[:8]}")
temp_files_to_clean.append(temp_multi_to_single_output)
arcpy.management.MultipartToSinglepart(current_polygon_source, temp_multi_to_single_output)
current_polygon_source = temp_multi_to_single_output
print_status(f"多部件至单部件转换完成,结果在内存: {current_polygon_source}")
# 8. 消除小面积图斑
print_status(f"开始消除小面积图斑: {current_polygon_source} (阈值: {min_area} {area_unit})")
eliminate_small_polygons(
current_polygon_source,
final_output_path, # 直接传递最终输出路径
min_area,
area_unit,
temp_files_to_clean # 传递临时文件列表
)
print_status("消除小面积图斑完成.")
# 9. 最终清理和输出结果
print_status("处理流程全部完成.")
# 清理在内存或临时位置生成的中间文件
temp_files_processor.clean_up_temp_files(temp_files=temp_files_to_clean, workspace=original_workspace)
# 验证最终输出文件是否存在
if arcpy.Exists(final_output_path):
print_result(True, final_output_path, "")
else:
raise Exception(f"处理完成,但最终输出文件 {final_output_path} 不存在。")
except FileNotFoundError as fnf_e:
error_msg = f"文件不存在错误: {str(fnf_e)}"
print_status(error_msg)
print_result(False, "", error_msg)
except ValueError as ve:
error_msg = f"参数错误或数据校验失败: {str(ve)}"
print_status(error_msg)
print_result(False, "", error_msg)
except arcpy.ExecuteError:
# 捕获 ArcPy 执行错误
error_msg = f"ArcPy 执行错误: {arcpy.GetMessages(2)}"
print_status(error_msg)
sys.stderr.write(f"ArcPyExecuteError:{arcpy.GetMessages(2)}\n") # 记录到标准错误
print_result(False, "", error_msg)
except Exception as e:
# 捕获其他未预料的错误
error_msg = f"发生未预料的错误: {str(e)}\n{traceback.format_exc()}"
print_status(error_msg)
sys.stderr.write(f"UnexpectedError:{error_msg}\n") # 记录到标准错误
print_result(False, "", error_msg)
finally:
# 确保在任何情况下都尝试清理(尽管在 except 块中也调用了)
# 这里的调用是最后的保障,如果 except 块中的清理失败了
print_status("脚本结束,执行最终清理...")
temp_files_processor.clean_up_temp_files(temp_files=temp_files_to_clean, workspace=original_workspace)
print_status("最终清理完成.")
sys.exit(0) # 正常退出脚本
if __name__ == "__main__":
print_status("脚本开始执行...")
main()

View File

@@ -0,0 +1,392 @@
# -*- coding: utf-8 -*-
import os
import re
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font, Border, Side, Alignment
from openpyxl.utils import get_column_letter
from tools.core.utils import arcgis_utils, common_utils
from tools.core.utils.os_utils import temp_files_processor
trzd5_order = ["砂质", "砂壤质", "壤质", "黏壤质", "黏质"]
trzd12_order = ["砂土及壤质砂土", "砂质壤土", "壤土", "粉砂质壤土", "砂质黏壤土", "黏壤土", "粉砂质黏壤土", "砂质黏土", "壤质黏土", "粉砂质黏土", "黏土", "重黏土"]
# --- 2. 辅助函数 ---
# 判断单元格类型
def get_merge_type(merged_range):
"""
判断合并类型
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None不是合并单元格
"""
if not merged_range:
return None
min_row, max_row = merged_range.min_row, merged_range.max_row
min_col, max_col = merged_range.min_col, merged_range.max_col
if max_row > min_row and max_col > min_col:
return 'both' # 同时跨行和跨列
elif max_row > min_row:
return 'row' # 行合并(垂直合并)
elif max_col > min_col:
return 'column' # 列合并(水平合并)
else:
return None # 实际上不是合并单元格
# 计算属性等级
def get_prop_level(prop_level):
"""根据输入值判断 返回等级"""
if pd.isna(prop_level) or str(prop_level) == "0":
return "-"
# 请根据您的实际分级标准调整这里的阈值
if str(prop_level) == "8" or prop_level == '砂土及壤质砂土':
return "砂质"
elif str(prop_level) == "11" or prop_level == '砂质壤土':
return "砂壤质"
elif str(prop_level) in ["6","3"] or prop_level in ['粉砂质壤土', '壤土']:
return "壤质"
elif str(prop_level) in ["1","4","9"] or prop_level in ['粉砂质年壤土', '黏壤土', '砂质黏壤土']:
return "黏壤质"
elif str(prop_level) in ["2","5","7","10","12"] or prop_level in ['粉砂质黏土', '黏土', '壤质黏土', '砂质黏土', '重黏土']:
return "黏质"
else:
return "-"
# 等级计算
def process_soil_dataframe(df:pd.DataFrame, level_config, target_prop):
"""
处理土壤数据DataFrame添加分级列
"""
result_df = df.copy()
if level_config and target_prop in df.columns:
grade_standards = level_config["标准等级"]
grade_column = "GRIDCODE"
# 使用向量化方法(性能更好)
result_df[grade_column] = common_utils.vectorized_grade_assignment(
df[target_prop].values, grade_standards
)
# 统计分级结果
result_df['YJDL'] = result_df['TDLYLX'].str[:2]
return result_df
# --- 3. 数据处理与分析 均值---
def process_data_for_table1(gdb_path, soil_prop_feature_name, df_origin_area, target_areas_dict,xzqmc,is_by_xzq, prop_config=None):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_feature_name
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
sample_fields = ['TDLYLX', field_name]
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
df_samples = clean_df(df_samples, [field_name])
processed_df = process_soil_dataframe(df_samples, prop_config, field_name) # 返回具有属性分级的列
processed_df['GRIDCODE'] = processed_df['GRIDCODE'].astype('int')
processed_df['属性分级'] = processed_df['GRIDCODE'].apply(get_prop_level)
# 计算全部样点均值、中位值、范围
processed_df[field_name] = processed_df[field_name].astype('float')
# ===处理样点数据,计算 各分级样点数
df_sample_means = processed_df.groupby(['属性分级','GRIDCODE']).size().reset_index(name='样点数')
df_sample_means['样点数占比'] = df_sample_means['样点数'] / df_sample_means['样点数'].sum() * 100
print("样点数计算完成。")
# ==处理制图数据,获各等级制图面积
# print(df_origin_area)
df_origin_area['YJDL'] = df_origin_area['YJDL_EJDL'].str.split('_').str[0]
df_map_data = df_origin_area.groupby(["XZQMC","YJDL", "GRIDCODE"]).agg({"temp_area": "sum"}).reset_index()
# print(df_map_data)
try:
if is_by_xzq:
df_map_data['adjusted_area'] = df_map_data['temp_area']
df_map_data['adjustment_factor'] = 1.0
# 获取所有存在的行政区和地类
existing_districts = df_map_data['XZQMC'].unique()
# 检查目标字典中的行政区是否存在
missing_districts = []
tt = [td for td in target_areas_dict.keys()]
for ed in existing_districts:
if ed not in tt:
missing_districts.append(ed)
# 如果有行政区不存在,返回原始数据并提示
if missing_districts:
print(f"警告:平差数据中不存在行政区: {missing_districts},未进行平差")
# 计算每个行政区每个地类的原始总面积
original_totals = df_map_data.groupby(['XZQMC', 'YJDL'])['temp_area'].sum()
# 对每个行政区的每个地类进行平差
for xzqmc, landuse_targets in target_areas_dict.items():
for yjdl, target_area in landuse_targets.items():
# 检查该行政区是否有此地类数据
if (xzqmc, yjdl) in original_totals.index and original_totals[(xzqmc, yjdl)] > 0:
adjustment_factor = target_area / original_totals[(xzqmc, yjdl)]
# 应用平差系数
mask = (df_map_data['XZQMC'] == xzqmc) & (df_map_data['YJDL'] == yjdl)
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
# print(f"{xzqmc} - 地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
else:
# 用df_target_area按YJDL进行平差计算
original_totals = df_map_data.groupby('YJDL')['temp_area'].sum().to_dict()
# 对每个地类进行平差
target_area_dict = target_areas_dict.get(xzqmc,"")
# print(target_areas_dict)
for yjdl, target_area in target_area_dict.items():
if (yjdl in original_totals and original_totals[yjdl] > 0) or target_area > 0:
adjustment_factor = target_area / original_totals[yjdl]
# 应用平差系数
mask = df_map_data['YJDL'] == yjdl
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
# print(f"地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
except Exception as e:
print(f"平差处理失败: {e}")
# print(df_map_data)
df_map_data['面积_亩'] = df_map_data['temp_area']
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level)
df_map_areas = df_map_data.groupby(['属性分级','GRIDCODE'])['面积_亩'].sum().reset_index(name='制图面积')
# 面积平差
df_map_areas['制图面积_平差后'] = df_map_areas['制图面积']
# ===计算面积占比
df_map_areas['面积占比'] = df_map_areas['制图面积_平差后'] / df_map_areas['制图面积_平差后'].sum() * 100
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['属性分级','GRIDCODE']],
df_map_areas[['属性分级','GRIDCODE']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['属性分级','GRIDCODE'], how='left')
df_final = pd.merge(df_final, df_map_areas, on=['属性分级','GRIDCODE'], how='left')
# print(df_final)
# (可选) 按“一级地类”和“二级地类”排序
df_final["属性分级"] = pd.Categorical(df_final['属性分级'], categories=trzd5_order, ordered=True)
# df_final["EJDL"] = pd.Categorical(df_final['EJDL'], categories=in_ejdl_order, ordered=True)
df_final.sort_values(['属性分级','GRIDCODE'], inplace=True)
print("数据处理流程完成!")
# print(df_final)
return df_final
# --- 3. Excel 制表 总表---
def write_to_excel_table1(df:pd.DataFrame, output_path, prop_config):
"""
【最终修正版】: 将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel将创建一个空的报告。")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws['A1'] = "没有有效的统计数据。"
wb.save(output_path)
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "行政区酸化程度等级分布及占比"
# --- a. 定义样式 (不变) ---
header_font = Font(name='宋体', size=11)
cell_font = Font(name='宋体', size=11)
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
def apply_style(cell_range, font, alignment=None, border=None):
for row in ws[cell_range]:
for cell in row:
cell.font = font
if alignment: cell.alignment = alignment
if border: cell.border = border
# --- b. 绘制表头 (不变) ---
ws.merge_cells('A1:B1'); ws['A1'] = '土壤三普分类'
ws.merge_cells('C1:D1'); ws['C1'] = '样点统计'
ws.merge_cells('E1:F1'); ws['E1'] = '制图统计'
ws['A2'] = '类别'; ws['B2'] = '名称'
ws['C2'] = '数量/个'; ws['D2'] = '占比%'
ws['E2'] = '面积/亩'; ws['F2'] = '占比%'
level_dict = prop_config['标准等级']
# 创建两个列表来分别存储上段和下段范围
upper_ranges = {value: key for key, value in level_dict.items()}
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('属性分级', sort=False, observed=False):
yl_start_row = current_row
# 1. 遍历该一级地类下的所有“二级地类”并写入数据
for _, row_data in group_yl_df.iterrows():
ws.cell(row=current_row, column=2).value = upper_ranges.get(str(row_data['GRIDCODE']), '-')
# --- 填充单元格的逻辑开始 ---
col_start = 3 # 从第 C 列开始填充
# 检查是否找到了该土属的数据
if not row_data.empty:
# 1. 构建要从 data_series 中查找的列名
sample_col = f'样点数'
sample_pct_col = f'样点数占比'
area_col = f'制图面积_平差后'
area_pct_col = f'面积占比'
# 2. 从 data_series 中安全地获取值
sample_val = row_data.get(sample_col, 0)
sample_pct_val = row_data.get(sample_pct_col, 0)
area_val = row_data.get(area_col, 0)
area_pct_val = row_data.get(area_pct_col, 0)
# 3. 将获取到的值填入单元格
ws.cell(row=current_row, column=col_start).value = f"{sample_val:.0f}" if sample_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 1).value = f"{sample_pct_val:.1f}" if sample_val > 0 else "-"
# 制图面积/亩
ws.cell(row=current_row, column=col_start + 2).value = f"{area_val:.0f}" if area_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 3).value = f"{area_pct_val:.1f}" if area_val > 0 else "-"
# 移动到下一个酸化等级的起始列
col_start += 2
else:
for _ in range(4):
ws.cell(row=current_row, column=col_start).value = "-"
col_start += 1
current_row += 1
# 合并“一级地类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row-1, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
# 2. 填充总计行
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=2)
ws.cell(row=current_row, column=1).value = '全区'
ws.cell(row=current_row, column=3).value = df['样点数'].sum()
ws.cell(row=current_row, column=4).value = '100'
ws.cell(row=current_row, column=5).value = f"{df['制图面积_平差后'].sum():.0f}"
ws.cell(row=current_row, column=6).value = '100'
# --- d. 应用样式和调整列宽 (最终健壮版) ---
if current_row > 1: # 确保有数据才应用样式
apply_style(f'A1:F{current_row}', cell_font, center_align, thin_border)
apply_style(f'A1:F2', header_font)
print("正在自动调整列宽...")
dims = {}
for row in ws.rows:
for cell in row:
if cell.value:
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
if get_merge_type(merged_range) == 'column':
continue
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
# 设置列宽
for col, value in dims.items():
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
def main(gdb_path, soil_prop_name, reclassed_features_path, dltb_features, output_path, target_area_dict,xzqmc, prop_config):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土壤分级分布.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
is_by_xzq = False if xzqmc not in ["北海市","来宾市","楚雄自治州"] else True
# out_table_mean = r"in_memory/out_table_mean"
temp_out_feature_class = r"in_memory/temp_out_feature_class"
temp_out_tables_area = r"in_memory/temp_out_tables_area"
# temp_files.append(out_table_mean)
temp_files.append(temp_out_tables_area)
# 求地类图斑和重分类栅格面的交集
arcpy.analysis.Intersect(
in_features=[dltb_features,reclassed_features_path],
out_feature_class=temp_out_feature_class,
join_attributes="ALL",
output_type="INPUT"
)
# 行政区划和相交结果进行交集制表
arcpy.analysis.TabulateIntersection(
in_zone_features="行政区划", # 乡镇边界
zone_fields="XZQMC",
in_class_features=temp_out_feature_class,
out_table=temp_out_tables_area,
class_fields="gridcode;YJDL_EJDL",
out_units="SQUARE_METERS"
)
clipped_table_df = arcgis_utils.read_arcgis_table(temp_out_tables_area)
# 生成表1 土壤属性分级分布 的统计Excel报告
final_dataframe = process_data_for_table1(gdb_path, soil_prop_name, clipped_table_df, target_area_dict,xzqmc,is_by_xzq, prop_config)
write_to_excel_table1(final_dataframe, output_excel_path, prop_config)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

View File

@@ -0,0 +1,360 @@
# -*- coding: utf-8 -*-
import os
import re
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font, Border, Side, Alignment
from openpyxl.utils import get_column_letter
from tools.core.utils import arcgis_utils, common_utils
from tools.core.utils.os_utils import temp_files_processor
# --- 2. 辅助函数 ---
# 判断单元格类型
def get_merge_type(merged_range):
"""
判断合并类型
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None不是合并单元格
"""
if not merged_range:
return None
min_row, max_row = merged_range.min_row, merged_range.max_row
min_col, max_col = merged_range.min_col, merged_range.max_col
if max_row > min_row and max_col > min_col:
return 'both' # 同时跨行和跨列
elif max_row > min_row:
return 'row' # 行合并(垂直合并)
elif max_col > min_col:
return 'column' # 列合并(水平合并)
else:
return None # 实际上不是合并单元格
# 计算属性等级
def get_prop_level(prop_level):
"""根据输入值判断 返回等级"""
if pd.isna(prop_level) or prop_level == 0:
return "-"
# 请根据您的实际分级标准调整这里的阈值
if int(prop_level) == 5 or prop_level == "砂质":
return "砂质"
elif int(prop_level) == 4 or prop_level == "砂壤质":
return "砂壤质"
elif int(prop_level) == 3 or prop_level == "壤质":
return "壤质"
elif int(prop_level) == 1 or prop_level == "黏壤质":
return "黏壤质"
elif int(prop_level) == 2 or prop_level == "黏质":
return "黏质"
else:
return "-"
# 等级计算
def process_soil_dataframe(df:pd.DataFrame, level_config, target_prop):
"""
处理土壤数据DataFrame添加分级列
"""
result_df = df.copy()
if level_config and target_prop in df.columns:
grade_standards = level_config["标准等级"]
grade_column = "GRIDCODE"
# 使用向量化方法(性能更好)
result_df[grade_column] = common_utils.vectorized_grade_assignment(
df[target_prop].values, grade_standards
)
# 统计分级结果
result_df['YJDL'] = result_df['TDLYLX'].str[:2]
return result_df
# --- 3. 数据处理与分析 均值---
def process_data_for_table1(gdb_path, soil_prop_feature_name, df_origin_area, target_areas_dict,xzqmc,is_by_xzq, prop_config=None):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_feature_name
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
sample_fields = ['TDLYLX', field_name]
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
df_samples = clean_df(df_samples, [field_name])
processed_df = df_samples.copy()
processed_df['属性分级'] = processed_df[field_name]
# ===处理样点数据,计算 各分级样点数
df_sample_means = processed_df.groupby(['属性分级']).size().reset_index(name='样点数')
df_sample_means['样点数占比'] = df_sample_means['样点数'] / df_sample_means['样点数'].sum() * 100
print("样点数计算完成。")
# ==处理制图数据,获各等级制图面积
# print(df_origin_area)
df_origin_area['YJDL'] = df_origin_area['YJDL_EJDL'].str.split('_').str[0]
df_map_data = df_origin_area.groupby(["XZQMC","YJDL", "GRIDCODE"]).agg({"temp_area": "sum"}).reset_index()
# print(df_map_data)
try:
if is_by_xzq:
df_map_data['adjusted_area'] = df_map_data['temp_area']
df_map_data['adjustment_factor'] = 1.0
# 获取所有存在的行政区和地类
existing_districts = df_map_data['XZQMC'].unique()
# 检查目标字典中的行政区是否存在
missing_districts = []
tt = [td for td in target_areas_dict.keys()]
for ed in existing_districts:
if ed not in tt:
missing_districts.append(ed)
# 如果有行政区不存在,返回原始数据并提示
if missing_districts:
print(f"警告:平差数据中不存在行政区: {missing_districts},未进行平差")
# 计算每个行政区每个地类的原始总面积
original_totals = df_map_data.groupby(['XZQMC', 'YJDL'])['temp_area'].sum()
# 对每个行政区的每个地类进行平差
for xzqmc, landuse_targets in target_areas_dict.items():
for yjdl, target_area in landuse_targets.items():
# 检查该行政区是否有此地类数据
if (xzqmc, yjdl) in original_totals.index and original_totals[(xzqmc, yjdl)] > 0:
adjustment_factor = target_area / original_totals[(xzqmc, yjdl)]
# 应用平差系数
mask = (df_map_data['XZQMC'] == xzqmc) & (df_map_data['YJDL'] == yjdl)
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
# print(f"{xzqmc} - 地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
else:
# 用df_target_area按YJDL进行平差计算
original_totals = df_map_data.groupby('YJDL')['temp_area'].sum().to_dict()
# 对每个地类进行平差
target_area_dict = target_areas_dict.get(xzqmc,"")
# print(target_areas_dict)
for yjdl, target_area in target_area_dict.items():
if (yjdl in original_totals and original_totals[yjdl] > 0) or target_area > 0:
adjustment_factor = target_area / original_totals[yjdl]
# 应用平差系数
mask = df_map_data['YJDL'] == yjdl
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
# print(f"地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
except Exception as e:
print(f"平差处理失败: {e}")
# print(df_map_data)
df_map_data['面积_亩'] = df_map_data['temp_area']
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level)
df_map_areas = df_map_data.groupby(['属性分级'])['面积_亩'].sum().reset_index(name='制图面积')
# 面积平差
df_map_areas['制图面积_平差后'] = df_map_areas['制图面积']
# ===计算面积占比
df_map_areas['面积占比'] = df_map_areas['制图面积_平差后'] / df_map_areas['制图面积_平差后'].sum() * 100
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['属性分级']],
df_map_areas[['属性分级']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['属性分级'], how='left')
df_final = pd.merge(df_final, df_map_areas, on=['属性分级'], how='left')
# print(df_final)
df_final.sort_values(['属性分级'], inplace=True)
print("数据处理流程完成!")
# print(df_final)
return df_final
# --- 3. Excel 制表 总表---
def write_to_excel_table1(df:pd.DataFrame, output_path, prop_config):
"""
【最终修正版】: 将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel将创建一个空的报告。")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws['A1'] = "没有有效的统计数据。"
wb.save(output_path)
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "土壤质地分类分布"
# --- a. 定义样式 (不变) ---
header_font = Font(name='宋体', size=11)
cell_font = Font(name='宋体', size=11)
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
def apply_style(cell_range, font, alignment=None, border=None):
for row in ws[cell_range]:
for cell in row:
cell.font = font
if alignment: cell.alignment = alignment
if border: cell.border = border
# --- b. 绘制表头 (不变) ---
ws.merge_cells('A1:A2'); ws['A1'] = '土壤质地类别'
ws.merge_cells('B1:C1'); ws['B1'] = '样点统计'
ws.merge_cells('D1:E1'); ws['D1'] = '制图统计'
ws['B2'] = '数量/个'; ws['C2'] = '占比%'
ws['D2'] = '面积/亩'; ws['E2'] = '占比%'
level_dict = prop_config['标准等级']
# 创建两个列表来分别存储上段和下段范围
upper_ranges = {value: key for key, value in level_dict.items()}
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
for index, row_data in df_to_write.iterrows():
# 检查是否找到了该土属的数据
if not row_data.empty:
# 1. 构建要从 data_series 中查找的列名
sample_col = f'样点数'
sample_pct_col = f'样点数占比'
area_col = f'制图面积_平差后'
area_pct_col = f'面积占比'
# 2. 从 data_series 中安全地获取值
row_name = row_data.get('属性分级', "")
sample_val = row_data.get(sample_col, 0)
sample_pct_val = row_data.get(sample_pct_col, 0)
area_val = row_data.get(area_col, 0)
area_pct_val = row_data.get(area_pct_col, 0)
ws.cell(row=current_row, column=1).value = f"{row_name}" if row_name else "-"
# 3. 将获取到的值填入单元格
ws.cell(row=current_row, column=2).value = f"{sample_val:.0f}" if sample_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=3).value = f"{sample_pct_val:.1f}" if sample_val > 0 else "-"
# 制图面积/亩
ws.cell(row=current_row, column=4).value = f"{area_val:.0f}" if area_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=5).value = f"{area_pct_val:.1f}" if area_val > 0 else "-"
current_row += 1
# 2. 填充总计行
ws.cell(row=current_row, column=1).value = '全区'
ws.cell(row=current_row, column=2).value = df['样点数'].sum()
ws.cell(row=current_row, column=3).value = '100'
ws.cell(row=current_row, column=4).value = f"{df['制图面积_平差后'].sum():.0f}"
ws.cell(row=current_row, column=5).value = '100'
# --- d. 应用样式和调整列宽 (最终健壮版) ---
if current_row > 1: # 确保有数据才应用样式
apply_style(f'A1:E{current_row}', cell_font, center_align, thin_border)
apply_style(f'A1:E2', header_font)
print("正在自动调整列宽...")
dims = {}
for row in ws.rows:
for cell in row:
if cell.value:
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
if get_merge_type(merged_range) == 'column':
continue
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
# 设置列宽
for col, value in dims.items():
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
def main(gdb_path, soil_prop_name, reclassed_features_path, dltb_features, output_path, target_area_dict,xzqmc, prop_config):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土壤分级分布.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
is_by_xzq = False if xzqmc not in ["北海市","来宾市","楚雄自治州"] else True
# out_table_mean = r"in_memory/out_table_mean"
temp_out_feature_class = r"in_memory/temp_out_feature_class"
temp_out_tables_area = r"in_memory/temp_out_tables_area"
# temp_files.append(out_table_mean)
temp_files.append(temp_out_tables_area)
# 求地类图斑和重分类栅格面的交集
arcpy.analysis.Intersect(
in_features=[dltb_features,reclassed_features_path],
out_feature_class=temp_out_feature_class,
join_attributes="ALL",
output_type="INPUT"
)
# 行政区划和相交结果进行交集制表
arcpy.analysis.TabulateIntersection(
in_zone_features="行政区划", # 乡镇边界
zone_fields="XZQMC",
in_class_features=temp_out_feature_class,
out_table=temp_out_tables_area,
class_fields="gridcode;YJDL_EJDL",
out_units="SQUARE_METERS"
)
clipped_table_df = arcgis_utils.read_arcgis_table(temp_out_tables_area)
# 生成表1 土壤属性分级分布 的统计Excel报告
final_dataframe = process_data_for_table1(gdb_path, soil_prop_name, clipped_table_df, target_area_dict,xzqmc,is_by_xzq, prop_config)
write_to_excel_table1(final_dataframe, output_excel_path, prop_config)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

View File

@@ -0,0 +1,513 @@
# -*- coding: utf-8 -*-
import os
import re
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font
from tools.core.utils import arcgis_utils, common_utils
from tools.core.utils.os_utils import temp_files_processor
from tools.core.utils.excel_utils import ExcelStyleUtils
# --- 2. 辅助函数 ---
xn_region = ['天峨县', '寻甸县', '罗平县', '丘北县', '永仁县', '南华县', '双柏县', '武定县', '祥云县', '楚雄彝族自治州']
hn_region = ['北海市', '海城区', '银海区', '铁山港区', '港南区', '容县', '平南县', '兴宁区', '武鸣区', '邕宁区', '苍梧县', '靖西市', '西畴县', '马关县', '澜沧县', '双江县', '永德县']
# 计算属性等级
def get_prop_level(prop_level):
"""根据输入值判断 返回等级"""
if pd.isna(prop_level) or prop_level == 0:
return "-"
# 请根据您的实际分级标准调整这里的阈值
if int(prop_level) == 1 or int(prop_level) == 6 or prop_level == '等级一':
return "Ⅰ级"
elif int(prop_level) == 2 or int(prop_level) == 7 or prop_level == '等级二':
return "Ⅱ级"
elif int(prop_level) == 3 or int(prop_level) == 8 or prop_level == '等级三':
return "Ⅲ级"
elif int(prop_level) == 4 or int(prop_level) == 9 or prop_level == '等级四':
return "Ⅳ级"
elif int(prop_level) == 5 or int(prop_level) == 10 or prop_level == '等级五':
return "Ⅴ级"
else:
return "-"
def get_prop_level_for_pH(prop_level):
if pd.isna(prop_level) or prop_level == 0:
return "-"
if int(prop_level) == 5 or prop_level == "等级五":
return "Ⅰ级"
elif int(prop_level) in [4, 6] or prop_level in ["等级四", "等级六"]:
return "Ⅱ级"
elif int(prop_level) in [3, 7] or prop_level in ["等级三", "等级七"]:
return "Ⅲ级"
elif int(prop_level) in [2, 8] or prop_level in ["等级二", "等级八"]:
return "Ⅳ级"
elif int(prop_level) in [1, 9] or prop_level in ["等级一", "等级九"]:
return "Ⅴ级"
else:
return "-"
def get_prop_level_for_hn_TRRZ(prop_level):
if pd.isna(prop_level) or prop_level == 0:
return "-"
if int(prop_level) == 3 or prop_level == "等级三":
return "Ⅰ级"
elif int(prop_level) == 4 or prop_level == "等级四":
return "Ⅱ级"
elif int(prop_level) in [2, 5] or prop_level in ["等级二", "等级五"]:
return "Ⅲ级"
elif int(prop_level) == 6 or prop_level == "等级六":
return "Ⅳ级"
elif int(prop_level) in [1, 7] or prop_level in ["等级一", "等级七"]:
return "Ⅴ级"
else:
return "-"
def get_prop_level_for_xn_TRRZ(prop_level):
if pd.isna(prop_level) or prop_level == 0:
return "-"
if int(prop_level) == 4 or prop_level == "等级四":
return "Ⅰ级"
elif int(prop_level) in [3,5] or prop_level in ["等级三", "等级五"]:
return "Ⅱ级"
elif int(prop_level) == 6 or prop_level == "等级六":
return "Ⅲ级"
elif int(prop_level) in [2, 7] or prop_level in ["等级二", "等级七"]:
return "Ⅳ级"
elif int(prop_level) in [1, 8] or prop_level in ["等级一", "等级八"]:
return "Ⅴ级"
else:
return "-"
# 等级计算
def process_soil_dataframe(df:pd.DataFrame, level_config, target_prop):
"""
处理土壤数据DataFrame添加分级列
"""
result_df = df.copy()
if level_config and target_prop in df.columns:
grade_standards = level_config["标准等级"]
grade_column = "GRIDCODE"
# 使用向量化方法(性能更好)
result_df[grade_column] = common_utils.vectorized_grade_assignment(
df[target_prop].values, grade_standards
)
# 统计分级结果
result_df['YJDL'] = result_df['TDLYLX'].str[:2]
return result_df
# --- 3. 数据处理与分析 均值---
def process_data_for_table1(gdb_path, soil_prop_feature_name, df_origin_area, target_areas_dict,xzqmc,is_by_xzq, prop_config=None):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_feature_name
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
sample_fields = ['TDLYLX', field_name]
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
df_samples = clean_df(df_samples, [field_name])
processed_df = process_soil_dataframe(df_samples, prop_config, field_name) # 返回具有属性分级的列
processed_df['GRIDCODE'] = processed_df['GRIDCODE'].astype('int')
if soil_prop_feature_name == 'PH':
processed_df['属性分级'] = processed_df['GRIDCODE'].apply(get_prop_level_for_pH)
elif soil_prop_feature_name == 'TRRZ' and xzqmc in hn_region:
processed_df['属性分级'] = processed_df['GRIDCODE'].apply(get_prop_level_for_hn_TRRZ)
elif soil_prop_feature_name == 'TRRZ' and xzqmc in xn_region:
processed_df['属性分级'] = processed_df['GRIDCODE'].apply(get_prop_level_for_xn_TRRZ)
else:
processed_df['属性分级'] = processed_df['GRIDCODE'].apply(get_prop_level)
# 计算全部样点均值、中位值、范围
processed_df[field_name] = processed_df[field_name].astype('float')
stat_sample = {
'min': processed_df[field_name].min(),
'max': processed_df[field_name].max(),
'mean':processed_df[field_name].mean(),
'median': processed_df[field_name].median(),
}
# ===处理样点数据,计算 各分级样点数
df_sample_means = processed_df.groupby(['属性分级','GRIDCODE']).size().reset_index(name='样点数')
df_sample_means['样点数占比'] = df_sample_means['样点数'] / df_sample_means['样点数'].sum() * 100
print("样点数计算完成。")
# ==处理制图数据,获各等级制图面积
# print(df_origin_area)
df_origin_area['YJDL'] = df_origin_area['YJDL_EJDL'].str.split('_').str[0]
# 定义需要过滤地类的属性列表
filtered_props = ['ECA', 'EMG', 'ACU', 'AZN', 'AFE', 'AMN', 'AMO', 'AB', 'AS1', 'TSE']
# 如果当前属性在列表中,则只统计耕地和园地
if soil_prop_feature_name in filtered_props:
farmland_yjdl = ['耕地', '园地'] # 01: 耕地, 02: 园地
df_origin_area = df_origin_area[df_origin_area['YJDL'].isin(farmland_yjdl)]
print(f"过滤制图数据仅统计耕地和园地YJDL in {farmland_yjdl}")
# 如果土壤属性为GZCHD则只需要耕地的面积统计
if soil_prop_feature_name in ['GZCHD']:
df_origin_area = df_origin_area[df_origin_area['YJDL'] == '耕地']
print(f"过滤制图数据GZCHD仅统计耕地")
df_map_data = df_origin_area.groupby(["XZQMC","YJDL", "GRIDCODE"]).agg({"temp_area": "sum"}).reset_index()
# print(df_map_data)
try:
if is_by_xzq:
df_map_data['adjusted_area'] = df_map_data['temp_area']
df_map_data['adjustment_factor'] = 1.0
# 获取所有存在的行政区和地类
existing_districts = df_map_data['XZQMC'].unique()
# 检查目标字典中的行政区是否存在
missing_districts = []
tt = [td for td in target_areas_dict.keys()]
for ed in existing_districts:
if ed not in tt:
missing_districts.append(ed)
# 如果有行政区不存在,返回原始数据并提示
if missing_districts:
print(f"警告:平差数据中不存在行政区: {missing_districts},未进行平差")
# 计算每个行政区每个地类的原始总面积
original_totals = df_map_data.groupby(['XZQMC', 'YJDL'])['temp_area'].sum()
# 对每个行政区的每个地类进行平差
for xzqmc, landuse_targets in target_areas_dict.items():
for yjdl, target_area in landuse_targets.items():
# 检查该行政区是否有此地类数据
if (xzqmc, yjdl) in original_totals.index and original_totals[(xzqmc, yjdl)] > 0:
adjustment_factor = target_area / original_totals[(xzqmc, yjdl)]
# 应用平差系数
mask = (df_map_data['XZQMC'] == xzqmc) & (df_map_data['YJDL'] == yjdl)
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
# print(f"{xzqmc} - 地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
else:
# 用df_target_area按YJDL进行平差计算
original_totals = df_map_data.groupby('YJDL')['temp_area'].sum().to_dict()
# 对每个地类进行平差
target_area_dict = target_areas_dict.get(xzqmc,"")
# print(target_areas_dict)
for yjdl, target_area in target_area_dict.items():
if (yjdl in original_totals and original_totals[yjdl] > 0) or target_area > 0:
adjustment_factor = target_area / original_totals[yjdl]
# 应用平差系数
mask = df_map_data['YJDL'] == yjdl
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
# print(f"地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
except Exception as e:
print(f"平差处理失败: {e}")
# print(df_map_data)
df_map_data['面积_亩'] = df_map_data['temp_area']
if soil_prop_feature_name == 'PH':
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level_for_pH)
elif soil_prop_feature_name == 'TRRZ' and xzqmc in hn_region:
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level_for_hn_TRRZ)
elif soil_prop_feature_name == 'TRRZ' and xzqmc in xn_region:
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level_for_xn_TRRZ)
else:
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level)
df_map_areas = df_map_data.groupby(['属性分级','GRIDCODE'])['面积_亩'].sum().reset_index(name='制图面积')
# 面积平差
df_map_areas['制图面积_平差后'] = df_map_areas['制图面积']
# ===计算面积占比
df_map_areas['面积占比'] = df_map_areas['制图面积_平差后'] / df_map_areas['制图面积_平差后'].sum() * 100
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['属性分级','GRIDCODE']],
df_map_areas[['属性分级','GRIDCODE']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['属性分级','GRIDCODE'], how='left')
df_final = pd.merge(df_final, df_map_areas, on=['属性分级','GRIDCODE'], how='left')
# print(df_final)
df_final.sort_values(['属性分级'], inplace=True)
print("数据处理流程完成!")
# print(df_final)
return df_final, stat_sample
# --- 3. Excel 制表 总表---
def write_to_excel_table1(df:pd.DataFrame, output_path, prop_config, soil_prop_tif, stat_sample):
"""
【最终修正版】: 将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel将创建一个空的报告。")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws['A1'] = "没有有效的统计数据。"
wb.save(output_path)
return
# 全区制图统计
"""
try:
raster = arcpy.Raster(soil_prop_tif)
# 转换为numpy数组进行计算
array = arcpy.RasterToNumPyArray(raster,nodata_to_value=9999)
# 过滤掉NoData值
# 过滤NoData值和9999值
array = array[~np.isnan(array)] # 过滤NoData
array = array[array != 9999] # 过滤9999
array = array.astype(np.float64)
stats = {
'min': round(np.min(array),2),
'max': round(np.max(array),2),
'mean': round(np.mean(array),2),
'median': round(np.median(array),2),
'std': round(np.std(array),2)
}
except Exception as e:
print(f"错误: {e}")
"""
# 全区样点统计
stats = stat_sample
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "行政区酸化程度等级分布及占比"
# 获取属性单位
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅']
fsn_props = ['砂粒含量','粉粒含量','黏粒含量','有效土层厚度']
prop_name_str = prop_config.get('项目分级','')
if prop_name_str:
split_name = prop_name_str.split('\n')[0].strip()
if split_name in special_prop:
prop_name = '1f'
elif split_name in fsn_props:
prop_name = '0f'
else:
prop_name = '2f'
else:
prop_name = '1f'
# print(prop_name_str, prop_name)
prop_unit_str = prop_config.get('分级标准', '')
if prop_unit_str:
prop_unit = prop_unit_str.split('\n')[1].strip()
else:
prop_unit = ''
# --- b. 绘制表头 (不变) ---
ws.merge_cells('A1:B1'); ws['A1'] = '土壤三普分级'
ws.merge_cells('C1:D1'); ws['C1'] = '样点统计'
ws.merge_cells('E1:F1'); ws['E1'] = '制图统计'
ws['A2'] = '分级'; ws['B2'] = '值域/' + prop_unit if prop_unit else '值域'
ws['C2'] = '数量/个'; ws['D2'] = '占比%'
ws['E2'] = '面积/亩'; ws['F2'] = '占比%'
acid_levels = ['Ⅰ级','Ⅱ级', 'Ⅲ级', 'Ⅳ级', 'Ⅴ级']
level_dict = prop_config['标准等级']
# 创建两个列表来分别存储上段和下段范围
upper_ranges = {}
lower_ranges = {}
# 遍历排序后的等级
for i, (level, ranges) in enumerate(sorted(level_dict.items(), key=lambda x: list(level_dict.keys()).index(x[0])), 1):
# 分割范围字符串
range_list = [r.strip() for r in ranges.split(',')]
if len(range_list) >= 1:
upper_ranges[i] = range_list[0]
if len(range_list) >= 2:
# 计算下段范围的索引(原始索引 + 等级总数)
lower_index = i + len(level_dict)
lower_ranges[lower_index] = range_list[1]
# 合并结果
upper_ranges.update(lower_ranges)
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('属性分级', sort=False, observed=False):
yl_start_row = current_row
# 1. 遍历该一级地类下的所有“二级地类”并写入数据
for _, row_data in group_yl_df.iterrows():
ws.cell(row=current_row, column=2).value = upper_ranges.get(row_data['GRIDCODE'], '-')
# --- 填充单元格的逻辑开始 ---
col_start = 3 # 从第 C 列开始填充
# 检查是否找到了该土属的数据
if not row_data.empty:
# 1. 构建要从 data_series 中查找的列名
sample_col = f'样点数'
sample_pct_col = f'样点数占比'
area_col = f'制图面积_平差后'
area_pct_col = f'面积占比'
# 2. 从 data_series 中安全地获取值
sample_val = row_data.get(sample_col, 0)
sample_pct_val = row_data.get(sample_pct_col, 0)
area_val = row_data.get(area_col, 0)
area_pct_val = row_data.get(area_pct_col, 0)
# 3. 将获取到的值填入单元格
ws.cell(row=current_row, column=col_start).value = f"{sample_val:.0f}" if sample_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 1).value = f"{sample_pct_val:.1f}" if sample_val > 0 else "-"
# 制图面积/亩
ws.cell(row=current_row, column=col_start + 2).value = f"{area_val:.0f}" if area_val > 0 else "-"
# 占比/%
ws.cell(row=current_row, column=col_start + 3).value = f"{area_pct_val:.1f}" if area_val > 0 else "-"
# 移动到下一个酸化等级的起始列
col_start += 2
else:
for _ in range(4):
ws.cell(row=current_row, column=col_start).value = "-"
col_start += 1
current_row += 1
# 合并“一级地类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row-1, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
# 2. 填充总计行
ws.cell(row=current_row, column=1).value = '全区'
ws.cell(row=current_row, column=2).value = '-'
ws.cell(row=current_row, column=3).value = df['样点数'].sum()
ws.cell(row=current_row, column=4).value = '100'
ws.cell(row=current_row, column=5).value = f"{df['制图面积_平差后'].sum():.0f}"
ws.cell(row=current_row, column=6).value = '100'
# 3. 合计单元格填充
ws.merge_cells(f'B{current_row + 1}:F{current_row + 1}')
ws.cell(row=current_row + 1, column=1).value = '全区均值'
ws.cell(row=current_row + 1, column=2).value = f'{stats["mean"]:.{prop_name}}'
ws.merge_cells(f'B{current_row + 2}:F{current_row + 2}')
ws.cell(row=current_row + 2, column=1).value = '全区中位值'
ws.cell(row=current_row + 2, column=2).value = f'{stats["median"]:.{prop_name}}'
ws.merge_cells(f'B{current_row + 3}:F{current_row + 3}')
ws.cell(row=current_row + 3, column=1).value = '全区范围'
ws.cell(row=current_row + 3, column=2).value = f'{stats["min"]:.{prop_name}} {stats["max"]:.{prop_name}}'
# --- a. 定义样式 ---
header_font = Font(name='宋体', size=11, bold=True)
# --- d. 应用样式和调整列宽 (最终健壮版) ---
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:F{current_row+3}')
ExcelStyleUtils.set_style(ws, f'A1:F2', header_font)
# 调整列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
def main(gdb_path, soil_prop_name, reclassed_features_path, dltb_features, soil_prop_tif, output_path, target_area_dict,xzqmc, prop_config):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土壤分级分布.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
is_by_xzq = False if xzqmc not in ["北海市","来宾市","楚雄自治州"] else True
# out_table_mean = r"in_memory/out_table_mean"
temp_out_feature_class = r"in_memory/temp_out_feature_class"
temp_out_tables_area = r"in_memory/temp_out_tables_area"
# temp_files.append(out_table_mean)
temp_files.append(temp_out_tables_area)
# if not arcpy.Exists(out_table_mean):
# # 2.用arcpy.sa.ZonalStatisticsAsTable 以表格进行分区统计
# arcpy.sa.ZonalStatisticsAsTable(
# dltb_features, "YJDL_EJDL", soil_prop_tif, out_table_mean, "DATA", "MEAN"
# )
# arcpy.management.CalculateField(out_table_mean, "YJDL", "!YJDL_EJDL!.split('_')[0]", "PYTHON3")
# arcpy.management.CalculateField(out_table_mean, "EJDL", "!YJDL_EJDL!.split('_')[1]", "PYTHON3")
# 求地类图斑和重分类栅格面的交集
arcpy.analysis.Intersect(
in_features=[dltb_features,reclassed_features_path],
out_feature_class=temp_out_feature_class,
join_attributes="ALL",
output_type="INPUT"
)
# 行政区划和相交结果进行交集制表
arcpy.analysis.TabulateIntersection(
in_zone_features="行政区划", # 乡镇边界
zone_fields="XZQMC",
in_class_features=temp_out_feature_class,
out_table=temp_out_tables_area,
class_fields="gridcode;YJDL_EJDL",
out_units="SQUARE_METERS"
)
clipped_table_df = arcgis_utils.read_arcgis_table(temp_out_tables_area)
# 生成表1 土壤属性分级分布 的统计Excel报告
final_dataframe,stat = process_data_for_table1(gdb_path, soil_prop_name, clipped_table_df, target_area_dict,xzqmc,is_by_xzq, prop_config)
write_to_excel_table1(final_dataframe, output_excel_path, prop_config, soil_prop_tif, stat)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

View File

@@ -0,0 +1,315 @@
# -*- coding: utf-8 -*-
import os
import re
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font, Border, Side, Alignment
from openpyxl.utils import get_column_letter
from tools.config.pandas_field_cal_func import calculate_ejdl, calculate_yjdl
from tools.core.utils.os_utils import temp_files_processor
yjdl_order = ["耕地", "园地", "林地", "草地", "其他"]
ejdl_order = ["水田", "旱地", "水浇地", "果园", "茶园", "橡胶园", "其他园地"]
# 土壤12级地质类别
trzd_order = ['砂土及壤质砂土', '砂质壤土','壤土','粉(砂)质壤土','砂质黏壤土','黏壤土','粉(砂)质黏壤土','砂质黏土','壤质黏土','粉(砂)质黏土','黏土','重黏土']
# --- 2. 辅助函数 ---
# 判断单元格类型
def get_merge_type(merged_range):
"""
判断合并类型
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None不是合并单元格
"""
if not merged_range:
return None
min_row, max_row = merged_range.min_row, merged_range.max_row
min_col, max_col = merged_range.min_col, merged_range.max_col
if max_row > min_row and max_col > min_col:
return 'both' # 同时跨行和跨列
elif max_row > min_row:
return 'row' # 行合并(垂直合并)
elif max_col > min_col:
return 'column' # 列合并(水平合并)
else:
return None # 实际上不是合并单元格
# --- 3. 数据处理与分析 均值---
def process_data_for_table2(gdb_path, soil_prop_feature_name, df_dltb, target_areas_df):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算样点数 ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_feature_name
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
sample_fields = ['TDLYLX', field_name]
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
df_samples = clean_df(df_samples, [field_name])
df_samples["YJDL"] = df_samples['TDLYLX'].apply(calculate_yjdl)
df_samples["EJDL"] = df_samples['TDLYLX'].apply(calculate_ejdl)
df_samples["GRIDCODE"] = df_samples[field_name].astype(int)
# 按 YJDL, EJDL 分组,计算 属性 的均值
df_sample_means = df_samples.groupby(['YJDL', 'EJDL', 'GRIDCODE']).size().reset_index(name="样点数")
total_sample_count = df_sample_means['样点数'].sum()
df_sample_means['样点数占比'] = df_sample_means['样点数'] / total_sample_count
# ==b. 处理制图数据,获各等级制图面积
df_dltb["YJDL"] = df_dltb['YJDL_EJDL'].apply(lambda x: x.split('_')[0])
df_dltb["EJDL"] = df_dltb["YJDL_EJDL"].apply(lambda x: x.split('_')[1])
df_dltb.columns = df_dltb.columns.str.upper()
df_dltb = clean_df(df_dltb, ['YJDL', 'EJDL'])
df_map_data = df_dltb.groupby(["YJDL","EJDL", "GRIDCODE"]).agg({"AREA": "sum"}).reset_index()
df_map_data['制图面积_原始'] = df_map_data['AREA'] * 0.0015 # 单位:亩
# df_map_data['面积占比'] = df_map_data['制图面积'] / df_map_data['制图面积'].sum()
# 第二步:整理目标面积表(确保字段名统一)
target_areas_df = target_areas_df.copy()
target_areas_df.columns = target_areas_df.columns.str.strip() # 去除字段名空格
# 重置索引确保EJDL是列而不是索引
if 'EJDL' not in target_areas_df.columns:
target_areas_df = target_areas_df.reset_index()
target_areas_df.rename(columns={'index': 'EJDL'}, inplace=True)
# 确保面积字段为数值型
target_areas_df['面积'] = pd.to_numeric(target_areas_df['面积'], errors='coerce').fillna(0)
# 第三步:按二级地类分组计算平差系数
# 先计算每个二级地类的原始合计面积
ejdl_original_sum = df_map_data.groupby('EJDL')['制图面积_原始'].sum().reset_index()
ejdl_original_sum.rename(columns={'制图面积_原始': '原始合计面积'}, inplace=True)
# 合并目标面积
ejdl_adj = pd.merge(ejdl_original_sum, target_areas_df, on='EJDL', how='left')
ejdl_adj.rename(columns={'面积': '目标合计面积'}, inplace=True)
# 填充无目标面积的二级地类(目标面积=原始面积,平差系数=1
ejdl_adj['目标合计面积'] = ejdl_adj['目标合计面积'].fillna(ejdl_adj['原始合计面积'])
# 计算平差系数(目标面积 / 原始面积避免除以0
ejdl_adj['平差系数'] = ejdl_adj['目标合计面积'] / ejdl_adj['原始合计面积'].replace(0, 1)
ejdl_adj['平差系数'] = ejdl_adj['平差系数'].fillna(1) # 极端情况填充1
# 第四步:应用平差系数到每个质地级别的制图面积
df_map_data = pd.merge(df_map_data, ejdl_adj[['EJDL', '平差系数']], on='EJDL', how='left')
df_map_data['平差系数'] = df_map_data['平差系数'].fillna(1) # 未匹配到的二级地类系数=1
# 计算平差后的制图面积
df_map_data['制图面积'] = df_map_data['制图面积_原始'] * df_map_data['平差系数']
# 重新计算面积占比(基于平差后的面积)
total_adjusted_area = df_map_data['制图面积'].sum()
df_map_data['面积占比'] = df_map_data['制图面积'] / total_adjusted_area
df_map_data = clean_df(df_map_data, ['YJDL', 'EJDL'])
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['YJDL', 'EJDL', 'GRIDCODE']],
df_map_data[['YJDL', 'EJDL', 'GRIDCODE']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['YJDL', 'EJDL', 'GRIDCODE'], how='left')
df_final = pd.merge(df_final, df_map_data, on=['YJDL', 'EJDL', 'GRIDCODE'], how='left')
# (可选) 按“一级地类”和“二级地类”排序
in_ejdl_order = ejdl_order + [x for x in df_final['EJDL'].unique() if x not in ejdl_order]
df_final["YJDL"] = pd.Categorical(df_final['YJDL'], categories=yjdl_order, ordered=True)
df_final["EJDL"] = pd.Categorical(df_final['EJDL'], categories=in_ejdl_order, ordered=True)
df_final["GRIDCODE"] = pd.Categorical(df_final['GRIDCODE'], categories=sorted(df_final['GRIDCODE'].unique()), ordered=True)
df_final.sort_values(['YJDL', 'EJDL', 'GRIDCODE'], inplace=True)
print("数据处理流程完成!")
return df_final
# 写入EXCEL 表2
def write_to_excel_table2(df, output_path, prop_config):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同土地利用类型属性变化统计"
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
cell_font = Font(name='等线', size=11)
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
def apply_style(cell_range, font, alignment=None, border=None):
for row in ws[cell_range]:
for cell in row:
cell.font = font
if alignment: cell.alignment = alignment
if border: cell.border = border
# --- b. 绘制表头 ---
ws.merge_cells('A1:B1'); ws['A1'] = '土地利用类型'
ws.merge_cells('C1:E1'); ws['C1'] = '样点统计'
ws.merge_cells('F1:G1'); ws['F1'] = '制图统计'
ws['A2'] = '一级'
ws['B2'] = '二级'
ws['C2'] = '质地类型'
ws['D2'] = '数量/个'
ws['E2'] = '占比%'
ws['F2'] = '面积/亩'
ws['G2'] = '占比%'
level_dict = prop_config['标准等级']
# 创建两个列表来分别存储上段和下段范围
upper_ranges = {value: key for key, value in level_dict.items()}
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('YJDL', sort=False, observed=False):
if group_yl_df.empty:
continue
print(f"正在写入一级地类: {yl}...")
yl_start_row = current_row
# 按二级地类分组
for ej, group_ej_df in group_yl_df.groupby('EJDL', sort=False, observed=False):
if group_ej_df.empty:
continue
print(f"正在写入二级地类: {ej}...")
ej_start_row = current_row
# 按“土壤质地分级”分组
for idx, row_data in group_ej_df.iterrows():
# 填充土壤质地分类
ws.cell(row=current_row, column=3).value = upper_ranges.get(str(row_data['GRIDCODE']), '-')
# 填充样点数据
ws.cell(row=current_row, column=4).value = row_data['样点数'] if not np.isnan(row_data['样点数']) else '-'
ws.cell(row=current_row, column=5).value = round(row_data['样点数占比']*100, 2) if not np.isnan(row_data['样点数占比']) else '-'
# 填充制图数据
ws.cell(row=current_row, column=6).value = round(row_data['制图面积'], 0) if not np.isnan(row_data['制图面积']) else '-'
ws.cell(row=current_row, column=7).value = round(row_data['面积占比']*100, 2) if not np.isnan(row_data['面积占比']) else '-'
current_row += 1
# 合并二级地类单元格
if ej_start_row <= current_row:
ws.merge_cells(start_row=ej_start_row, start_column=2, end_row=current_row-1, end_column=2)
ws.cell(row=ej_start_row, column=2).value = ej
# 一级地类合计行
ws.merge_cells(start_row=current_row, start_column=2, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=2).value = '合计'
ws.cell(row=current_row, column=4).value = round(group_yl_df['样点数'].sum(), 0) if not np.isnan(group_yl_df['样点数'].sum()) else '-'
ws.cell(row=current_row, column=5).value = round(group_yl_df['样点数占比'].sum()*100, 2) if not np.isnan(group_yl_df['样点数占比'].sum()) else '-'
ws.cell(row=current_row, column=6).value = round(group_yl_df['制图面积'].sum(), 0) if not np.isnan(group_yl_df['制图面积'].sum()) else '-'
ws.cell(row=current_row, column=7).value = round(group_yl_df['面积占比'].sum()*100, 2) if not np.isnan(group_yl_df['面积占比'].sum()) else '-'
# 合并一级地类单元格(修正合并范围)
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
current_row += 1
# --- 5. 全区汇总行 ---
ws.cell(row=current_row, column=1).value = '全区汇总'
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=4).value = round(df_to_write['样点数'].sum(), 0) if not np.isnan(df_to_write['样点数'].sum()) else '-'
ws.cell(row=current_row, column=5).value = round(df_to_write['样点数占比'].sum()*100, 2) if not np.isnan(df_to_write['样点数占比'].sum()) else '-'
ws.cell(row=current_row, column=6).value = round(df_to_write['制图面积'].sum(), 0) if not np.isnan(df_to_write['制图面积'].sum()) else '-'
ws.cell(row=current_row, column=7).value = round(df_to_write['面积占比'].sum()*100, 2) if not np.isnan(df_to_write['面积占比'].sum()) else '-'
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
apply_style(f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
dims = {}
for row in ws.rows:
for cell in row:
if cell.value:
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
if get_merge_type(merged_range) == 'column':
continue
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
# 设置列宽
for col, value in dims.items():
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
# def main(gdb_path, soil_prop_name, dltb_features, reclassed_feature, output_path,target_areas_df, prop_config):
# print(target_areas_df)
# df = pd.read_csv(r"D:\ProgramData\ArcGis_Py\测试数据.csv")
# output_path = r"E:\@三普属性图出图\测试\AAA.xlsx"
# write_to_excel_table2(df,output_path,prop_config)
def main(gdb_path, soil_prop_name, dltb_features, reclassed_feature, output_path,target_areas_df, prop_config):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土地利用类型土壤.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
out_table_mean = r"in_memory/out_table_mean"
temp_files.append(out_table_mean)
if not arcpy.Exists(out_table_mean):
# 2.使用交集制表计算每个TRZD的面积
arcpy.analysis.TabulateIntersection(dltb_features, "YJDL_EJDL", reclassed_feature, out_table_mean, "gridcode", out_units="SQUARE_METERS")
dltb_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YJDL_EJDL", "gridcode", "AREA"]))
# 生成表1 土壤属性分级分布 的统计Excel报告
final_dataframe = process_data_for_table2(gdb_path, soil_prop_name, dltb_df, target_areas_df)
write_to_excel_table2(final_dataframe, output_excel_path, prop_config)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# df = pd.read_csv(r"D:\ProgramData\ArcGis_Py\测试数据.csv")
# output_path = r"E:\@三普属性图出图\测试\AAA.xlsx"
# write_to_excel_table2(df,output_path)

View File

@@ -0,0 +1,336 @@
# -*- coding: utf-8 -*-
import os
import re
from matplotlib.artist import get
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font, Border, Side, Alignment
from openpyxl.utils import get_column_letter
from tools.config.pandas_field_cal_func import calculate_ejdl, calculate_yjdl
from tools.core.utils.os_utils import temp_files_processor
yjdl_order = ["耕地", "园地", "林地", "草地", "其他"]
ejdl_order = ["水田", "旱地", "水浇地", "果园", "茶园", "橡胶园", "其他园地"]
# 土壤12级地质类别
trzd_order = ['黏壤质','黏质','壤质','砂壤质','砂质']
# --- 2. 辅助函数 ---
def get_prop_level(prop_level):
"""根据输入值判断 返回等级"""
if pd.isna(prop_level) or prop_level == 0:
return "-"
# 请根据您的实际分级标准调整这里的阈值
if int(prop_level) == 5 or prop_level == "砂质":
return "砂质"
elif int(prop_level) == 4 or prop_level == "砂壤质":
return "砂壤质"
elif int(prop_level) == 3 or prop_level == "壤质":
return "壤质"
elif int(prop_level) == 1 or prop_level == "黏壤质":
return "黏壤质"
elif int(prop_level) == 2 or prop_level == "黏质":
return "黏质"
else:
return "-"
# 判断单元格类型
def get_merge_type(merged_range):
"""
判断合并类型
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None不是合并单元格
"""
if not merged_range:
return None
min_row, max_row = merged_range.min_row, merged_range.max_row
min_col, max_col = merged_range.min_col, merged_range.max_col
if max_row > min_row and max_col > min_col:
return 'both' # 同时跨行和跨列
elif max_row > min_row:
return 'row' # 行合并(垂直合并)
elif max_col > min_col:
return 'column' # 列合并(水平合并)
else:
return None # 实际上不是合并单元格
# --- 3. 数据处理与分析 均值---
def process_data_for_table2(gdb_path, soil_prop_feature_name, df_dltb, target_areas_df):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算样点数 ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_feature_name
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
sample_fields = ['TDLYLX', field_name]
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
df_samples = clean_df(df_samples, [field_name])
df_samples["YJDL"] = df_samples['TDLYLX'].apply(calculate_yjdl)
df_samples["EJDL"] = df_samples['TDLYLX'].apply(calculate_ejdl)
df_samples["GRIDCODE"] = df_samples[field_name]
# 按 YJDL, EJDL 分组,计算 属性 的均值
df_sample_means = df_samples.groupby(['YJDL', 'EJDL', 'GRIDCODE']).size().reset_index(name="样点数")
total_sample_count = df_sample_means['样点数'].sum()
df_sample_means['样点数占比'] = df_sample_means['样点数'] / total_sample_count
# ==b. 处理制图数据,获各等级制图面积
df_dltb["YJDL"] = df_dltb['YJDL_EJDL'].apply(lambda x: x.split('_')[0])
df_dltb["EJDL"] = df_dltb["YJDL_EJDL"].apply(lambda x: x.split('_')[1])
df_dltb.columns = df_dltb.columns.str.upper()
df_dltb = clean_df(df_dltb, ['YJDL', 'EJDL'])
df_dltb['GRIDCODE'] = df_dltb['GRIDCODE'].apply(get_prop_level)
df_map_data = df_dltb.groupby(["YJDL","EJDL", "GRIDCODE"]).agg({"AREA": "sum"}).reset_index()
df_map_data['制图面积_原始'] = df_map_data['AREA'] * 0.0015 # 单位:亩
# df_map_data['面积占比'] = df_map_data['制图面积'] / df_map_data['制图面积'].sum()
# 第二步:整理目标面积表(确保字段名统一)
target_areas_df = target_areas_df.copy()
target_areas_df.columns = target_areas_df.columns.str.strip() # 去除字段名空格
# 重置索引确保EJDL是列而不是索引
if 'EJDL' not in target_areas_df.columns:
target_areas_df = target_areas_df.reset_index()
target_areas_df.rename(columns={'index': 'EJDL'}, inplace=True)
# 确保面积字段为数值型
target_areas_df['面积'] = pd.to_numeric(target_areas_df['面积'], errors='coerce').fillna(0)
# 第三步:按二级地类分组计算平差系数
# 先计算每个二级地类的原始合计面积
ejdl_original_sum = df_map_data.groupby('EJDL')['制图面积_原始'].sum().reset_index()
ejdl_original_sum.rename(columns={'制图面积_原始': '原始合计面积'}, inplace=True)
# 合并目标面积
ejdl_adj = pd.merge(ejdl_original_sum, target_areas_df, on='EJDL', how='left')
ejdl_adj.rename(columns={'面积': '目标合计面积'}, inplace=True)
# 填充无目标面积的二级地类(目标面积=原始面积,平差系数=1
ejdl_adj['目标合计面积'] = ejdl_adj['目标合计面积'].fillna(ejdl_adj['原始合计面积'])
# 计算平差系数(目标面积 / 原始面积避免除以0
ejdl_adj['平差系数'] = ejdl_adj['目标合计面积'] / ejdl_adj['原始合计面积'].replace(0, 1)
ejdl_adj['平差系数'] = ejdl_adj['平差系数'].fillna(1) # 极端情况填充1
# 第四步:应用平差系数到每个质地级别的制图面积
df_map_data = pd.merge(df_map_data, ejdl_adj[['EJDL', '平差系数']], on='EJDL', how='left')
df_map_data['平差系数'] = df_map_data['平差系数'].fillna(1) # 未匹配到的二级地类系数=1
# 计算平差后的制图面积
df_map_data['制图面积'] = df_map_data['制图面积_原始'] * df_map_data['平差系数']
# 重新计算面积占比(基于平差后的面积)
total_adjusted_area = df_map_data['制图面积'].sum()
df_map_data['面积占比'] = df_map_data['制图面积'] / total_adjusted_area
df_map_data = clean_df(df_map_data, ['YJDL', 'EJDL'])
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['YJDL', 'EJDL', 'GRIDCODE']],
df_map_data[['YJDL', 'EJDL', 'GRIDCODE']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['YJDL', 'EJDL', 'GRIDCODE'], how='left')
df_final = pd.merge(df_final, df_map_data, on=['YJDL', 'EJDL', 'GRIDCODE'], how='left')
# (可选) 按“一级地类”和“二级地类”排序
in_ejdl_order = ejdl_order + [x for x in df_final['EJDL'].unique() if x not in ejdl_order]
df_final["YJDL"] = pd.Categorical(df_final['YJDL'], categories=yjdl_order, ordered=True)
df_final["EJDL"] = pd.Categorical(df_final['EJDL'], categories=in_ejdl_order, ordered=True)
df_final["GRIDCODE"] = pd.Categorical(df_final['GRIDCODE'], categories=sorted(df_final['GRIDCODE'].unique()), ordered=True)
df_final.sort_values(['YJDL', 'EJDL', 'GRIDCODE'], inplace=True)
print("数据处理流程完成!")
return df_final
# 写入EXCEL 表2
def write_to_excel_table2(df, output_path, prop_config):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同土地利用类型属性变化统计"
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
cell_font = Font(name='等线', size=11)
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
def apply_style(cell_range, font, alignment=None, border=None):
for row in ws[cell_range]:
for cell in row:
cell.font = font
if alignment: cell.alignment = alignment
if border: cell.border = border
# --- b. 绘制表头 ---
ws.merge_cells('A1:B1'); ws['A1'] = '土地利用类型'
ws.merge_cells('C1:E1'); ws['C1'] = '样点统计'
ws.merge_cells('F1:G1'); ws['F1'] = '制图统计'
ws['A2'] = '一级'
ws['B2'] = '二级'
ws['C2'] = '质地类型'
ws['D2'] = '数量/个'
ws['E2'] = '占比%'
ws['F2'] = '面积/亩'
ws['G2'] = '占比%'
level_dict = prop_config['标准等级']
# 创建两个列表来分别存储上段和下段范围
upper_ranges = {value: key for key, value in level_dict.items()}
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('YJDL', sort=False, observed=False):
if group_yl_df.empty:
continue
print(f"正在写入一级地类: {yl}...")
yl_start_row = current_row
# 按二级地类分组
for ej, group_ej_df in group_yl_df.groupby('EJDL', sort=False, observed=False):
if group_ej_df.empty:
continue
print(f"正在写入二级地类: {ej}...")
ej_start_row = current_row
# 按“土壤质地分级”分组
for idx, row_data in group_ej_df.iterrows():
# 填充土壤质地分类
ws.cell(row=current_row, column=3).value = str(row_data['GRIDCODE'])
# 填充样点数据
ws.cell(row=current_row, column=4).value = row_data['样点数'] if not np.isnan(row_data['样点数']) else '-'
ws.cell(row=current_row, column=5).value = round(row_data['样点数占比']*100, 2) if not np.isnan(row_data['样点数占比']) else '-'
# 填充制图数据
ws.cell(row=current_row, column=6).value = round(row_data['制图面积'], 0) if not np.isnan(row_data['制图面积']) else '-'
ws.cell(row=current_row, column=7).value = round(row_data['面积占比']*100, 2) if not np.isnan(row_data['面积占比']) else '-'
current_row += 1
# 合并二级地类单元格
if ej_start_row <= current_row:
ws.merge_cells(start_row=ej_start_row, start_column=2, end_row=current_row-1, end_column=2)
ws.cell(row=ej_start_row, column=2).value = ej
# 一级地类合计行
ws.merge_cells(start_row=current_row, start_column=2, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=2).value = '合计'
ws.cell(row=current_row, column=4).value = round(group_yl_df['样点数'].sum(), 0) if not np.isnan(group_yl_df['样点数'].sum()) else '-'
ws.cell(row=current_row, column=5).value = round(group_yl_df['样点数占比'].sum()*100, 2) if not np.isnan(group_yl_df['样点数占比'].sum()) else '-'
ws.cell(row=current_row, column=6).value = round(group_yl_df['制图面积'].sum(), 0) if not np.isnan(group_yl_df['制图面积'].sum()) else '-'
ws.cell(row=current_row, column=7).value = round(group_yl_df['面积占比'].sum()*100, 2) if not np.isnan(group_yl_df['面积占比'].sum()) else '-'
# 合并一级地类单元格(修正合并范围)
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
current_row += 1
# --- 5. 全区汇总行 ---
ws.cell(row=current_row, column=1).value = '全区汇总'
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=4).value = round(df_to_write['样点数'].sum(), 0) if not np.isnan(df_to_write['样点数'].sum()) else '-'
ws.cell(row=current_row, column=5).value = round(df_to_write['样点数占比'].sum()*100, 2) if not np.isnan(df_to_write['样点数占比'].sum()) else '-'
ws.cell(row=current_row, column=6).value = round(df_to_write['制图面积'].sum(), 0) if not np.isnan(df_to_write['制图面积'].sum()) else '-'
ws.cell(row=current_row, column=7).value = round(df_to_write['面积占比'].sum()*100, 2) if not np.isnan(df_to_write['面积占比'].sum()) else '-'
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
apply_style(f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
dims = {}
for row in ws.rows:
for cell in row:
if cell.value:
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
if get_merge_type(merged_range) == 'column':
continue
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
# 设置列宽
for col, value in dims.items():
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
# def main(gdb_path, soil_prop_name, dltb_features, reclassed_feature, output_path,target_areas_df, prop_config):
# print(target_areas_df)
# df = pd.read_csv(r"D:\ProgramData\ArcGis_Py\测试数据.csv")
# output_path = r"E:\@三普属性图出图\测试\AAA.xlsx"
# write_to_excel_table2(df,output_path,prop_config)
def main(gdb_path, soil_prop_name, dltb_features, reclassed_feature, output_path,target_areas_df, prop_config):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土地利用类型土壤.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
out_table_mean = r"in_memory/out_table_mean"
temp_files.append(out_table_mean)
if not arcpy.Exists(out_table_mean):
# 2.使用交集制表计算每个TRZD的面积
arcpy.analysis.TabulateIntersection(dltb_features, "YJDL_EJDL", reclassed_feature, out_table_mean, "gridcode", out_units="SQUARE_METERS")
dltb_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YJDL_EJDL", "gridcode", "AREA"]))
# 生成表1 土壤属性分级分布 的统计Excel报告
final_dataframe = process_data_for_table2(gdb_path, soil_prop_name, dltb_df, target_areas_df)
write_to_excel_table2(final_dataframe, output_excel_path, prop_config)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# df = pd.read_csv(r"D:\ProgramData\ArcGis_Py\测试数据.csv")
# output_path = r"E:\@三普属性图出图\测试\AAA.xlsx"
# write_to_excel_table2(df,output_path)

View File

@@ -0,0 +1,328 @@
# -*- coding: utf-8 -*-
import os
import re
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font
from openpyxl.utils import get_column_letter
from tools.config.pandas_field_cal_func import calculate_ejdl, calculate_yjdl
from tools.core.utils.os_utils import temp_files_processor
from tools.core.utils.excel_utils import ExcelStyleUtils
yjdl_order = ["耕地", "园地", "林地", "草地", "其他"]
ejdl_order = ["水田", "旱地", "水浇地", "果园", "茶园", "橡胶园", "其他园地"]
# --- 3. 数据处理与分析 均值---
def process_data_for_table2(gdb_path, soil_prop_feature_name, df_dltb, target_areas_df):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns):
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_feature_name
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
sample_fields = ['TDLYLX', field_name]
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
df_samples = clean_df(df_samples, [field_name])
df_samples["YJDL"] = df_samples['TDLYLX'].apply(calculate_yjdl)
df_samples["EJDL"] = df_samples['TDLYLX'].apply(calculate_ejdl)
df_samples[field_name] = df_samples[field_name].astype(float)
# 按 YJDL, EJDL 分组,计算 属性 的均值
df_sample_means = df_samples.groupby(['YJDL', 'EJDL'])[field_name].agg(['count', 'max', 'min', 'mean']).reset_index()
# ==b. 处理制图数据,获各等级制图面积
df_dltb["YJDL"] = df_dltb['YJDL_EJDL'].apply(lambda x: x.split('_')[0])
df_dltb["EJDL"] = df_dltb["YJDL_EJDL"].apply(lambda x: x.split('_')[1])
df_dltb = clean_df(df_dltb, ['YJDL', 'EJDL'])
df_dltb.rename(columns={'MEAN': '制图均值', 'COUNT': '制图样点数'}, inplace=True)
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['YJDL', 'EJDL']],
df_dltb[['YJDL', 'EJDL']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['YJDL', 'EJDL'], how='left')
df_final = pd.merge(df_final, df_dltb, on=['YJDL', 'EJDL'], how='left')
df_final = pd.merge(df_final, target_areas_df, on=['EJDL'], how='left')
# (可选) 按“一级地类”和“二级地类”排序
in_ejdl_order = ejdl_order + [x for x in df_final['EJDL'].unique() if x not in ejdl_order]
df_final["YJDL"] = pd.Categorical(df_final['YJDL'], categories=yjdl_order, ordered=True)
df_final["EJDL"] = pd.Categorical(df_final['EJDL'], categories=in_ejdl_order, ordered=True)
df_final.sort_values(['YJDL', 'EJDL'], inplace=True)
print("数据处理流程完成!")
return df_final
# 写入EXCEL 表2
def write_to_excel_table2(df, output_path, prop_config:dict, soil_prop_name: str = ''):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同土地利用类型属性变化统计"
# 获取属性单位
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅','全钾']
fsn_props = ['砂粒含量','粉粒含量','黏粒含量','有效土层厚度']
prop_name_str = prop_config.get('项目分级','')
if prop_name_str:
split_name = prop_name_str.split('\n')[0].strip()
if split_name in special_prop:
prop_name = '1f'
elif split_name in fsn_props:
prop_name = '0f'
else:
prop_name = '2f'
else:
prop_name = '1f'
prop_unit_str = prop_config.get('分级标准', '')
if prop_unit_str:
prop_unit = prop_unit_str.split('\n')[1].strip()
else:
prop_unit = ''
# --- b. 绘制表头 ---
ws.merge_cells('A1:B1'); ws['A1'] = '土地利用类型'
ws.merge_cells('C1:E1'); ws['C1'] = '样点统计'
ws.merge_cells('F1:G1'); ws['F1'] = '制图统计'
ws['A2'] = '一级'
ws['B2'] = '二级'
ws['C2'] = '均值/' + prop_unit
ws['D2'] = '范围/' + prop_unit
ws['E2'] = '数量/个'
ws['F2'] = '均值/' + prop_unit
ws['G2'] = '面积/亩'
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
filtered_props = ['ECA', 'EMG', 'ACU', 'AZN', 'AFE', 'AMN', 'AMO', 'AB', 'AS1', 'TSE']
for yl, group_yl_df in df_to_write.groupby('YJDL', sort=False, observed=False):
print(f"正在写入一级地类: {yl}...")
yl_start_row = current_row
# 遍历该一级地类下的所有“二级地类”
for _, row_data in group_yl_df.iterrows():
ws.cell(row=current_row, column=2).value = row_data['EJDL']
# 填充样点数据
sample_mean = row_data.get('mean')
if pd.notna(sample_mean):
ws.cell(row=current_row, column=3).value = f"{sample_mean:.{prop_name}}"
ws.cell(row=current_row, column=4).value = f"{row_data.get('min', '-'):.{prop_name}}{row_data.get('max', '-'):.{prop_name}}"
ws.cell(row=current_row, column=5).value = row_data.get('count', '-')
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = "-"
ws.cell(row=current_row, column=5).value = "-"
# 填充制图数据
map_mean = row_data.get('制图均值')
if pd.notna(map_mean):
ws.cell(row=current_row, column=6).value = f"{map_mean:.{prop_name}}"
ws.cell(row=current_row, column=7).value = f"{row_data.get('面积', '-'):.0f}"
else:
ws.cell(row=current_row, column=6).value = "-"
ws.cell(row=current_row, column=7).value = "-"
current_row += 1
# 计算并写入“合计”行
if ws.cell(row=current_row-1, column=2).value in ["林地", "草地", "其他"]:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=yl_start_row, end_column=2)
ws.cell(row=yl_start_row, column=1).value = yl
if soil_prop_name in filtered_props:
ws.cell(row=yl_start_row, column=6).value = "-"
ws.cell(row=yl_start_row, column=7).value = "-"
continue
ws.cell(row=current_row, column=2).value = '合计'
# 计算合计行的均值 (均值的均值)
total_count = group_yl_df['count'].sum()
weighted_sum = group_yl_df['mean']*group_yl_df['count']
if not weighted_sum.empty and total_count != 0:
total_sample_mean = weighted_sum.sum()/group_yl_df['count'].sum()
else:
total_sample_mean = None
min_min, max_max = group_yl_df['min'].min(), group_yl_df['max'].max()
if pd.notna(total_sample_mean):
ws.cell(row=current_row, column=3).value = f"{total_sample_mean:.{prop_name}}"
ws.cell(row=current_row, column=4).value = f"{min_min:.{prop_name}}{max_max:.{prop_name}}"
ws.cell(row=current_row, column=5).value = f"{total_count:.0f}"
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = "-"
ws.cell(row=current_row, column=5).value = "-"
# b. **【核心修正】: 计算合计行的“制图均值”(加权平均)**
# 准备加权平均的分子和分母
weighted_sum = 0
total_count = 0
# 遍历当前一级地类分组中的每一行
for _, row in group_yl_df.iterrows():
mean_val = row.get('制图均值')
count_val = row.get('制图样点数')
# 只有当均值和样点数都存在且有效时,才参与计算
if pd.notna(mean_val) and pd.notna(count_val) and count_val > 0:
weighted_sum += mean_val * count_val # Σ (mean * count)
total_count += count_val # Σ (count)
# 计算加权平均值
weighted_avg = (weighted_sum / total_count) if total_count > 0 else 0
total_area = group_yl_df['面积'].sum()
if weighted_avg > 0:
ws.cell(row=current_row, column=6).value = f"{weighted_avg:.{prop_name}}"
ws.cell(row=current_row, column=7).value = f"{total_area:.0f}"
else:
ws.cell(row=current_row, column=6).value = "-"
ws.cell(row=current_row, column=7).value = "-"
# 合并“一级地类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
current_row += 1
# 计算全区的均值、范围、数量
if soil_prop_name in filtered_props:
# 只基于耕地和园地计算全区统计
df_for_total = df_to_write[df_to_write['YJDL'].isin(['耕地', '园地'])].copy()
print(f"全区统计过滤仅基于耕地和园地YJDL in ['耕地', '园地']")
else:
df_for_total = df_to_write.copy()
# 使用 df_for_total 进行后续计算
total_weighted_sum = df_for_total['mean'] * df_for_total['count']
total_counts = df_for_total['count'].sum()
if total_counts > 0:
total_mean = total_weighted_sum.sum() / total_counts
else:
total_mean = None
if not df_for_total.empty:
total_range = f"{df_for_total['min'].min():.{prop_name}}{df_for_total['max'].max():.{prop_name}}"
total_zhitu_weighted_sum = df_for_total['制图均值']*df_for_total['面积']
total_areas = df_for_total['面积'].sum()
if total_areas > 0:
total_zhitu_mean = total_zhitu_weighted_sum.sum() / total_areas
else:
total_zhitu_mean = None
else:
total_range = "-"
total_zhitu_mean = None
total_areas = 0
# 填充全区统计行
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=2)
ws.cell(row=current_row, column=1).value = '全区'
if pd.notna(total_mean):
ws.cell(row=current_row, column=3).value = f"{total_mean:.{prop_name}}"
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = total_range
ws.cell(row=current_row, column=5).value = f"{total_counts:.0f}" if total_counts > 0 else "-"
if pd.notna(total_zhitu_mean):
ws.cell(row=current_row, column=6).value = f"{total_zhitu_mean:.{prop_name}}"
else:
ws.cell(row=current_row, column=6).value = "-"
ws.cell(row=current_row, column=7).value = f"{total_areas:.0f}" if total_areas > 0 else "-"
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws,f'A1:{max_col_letter}{current_row}')
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
# 设置列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
def main(gdb_path, soil_prop_name, dltb_features, soil_prop_tif, output_path,target_areas_df, prop_config):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土地利用类型土壤.xlsx") # 生成的Excel报告文件路径
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
out_table_mean = r"in_memory/out_table_mean"
temp_files.append(out_table_mean)
if not arcpy.Exists(out_table_mean):
# 2.用arcpy.sa.ZonalStatisticsAsTable 以表格进行分区统计
arcpy.sa.ZonalStatisticsAsTable(dltb_features, "YJDL_EJDL", soil_prop_tif, out_table_mean, "DATA", "MEAN")
dltb_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YJDL_EJDL", "MEAN", "COUNT"]))
# 生成表1 土壤属性分级分布 的统计Excel报告
final_dataframe = process_data_for_table2(gdb_path, soil_prop_name, dltb_df, target_areas_df)
# final_dataframe = process_data_for_table5_2(gdb_path, out_table_area, sample_table_name, df_with_factors)
write_to_excel_table2(final_dataframe, output_excel_path, prop_config, soil_prop_name)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

View File

@@ -0,0 +1,446 @@
# -*- coding: utf-8 -*-
import os
import re
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font, Border, Side, Alignment
from openpyxl.utils import get_column_letter
from tools.config.pandas_field_cal_func import calculate_muyan, calculate_muzhi
from tools.config.custom_sort import yl_order, ts_order
from tools.core.utils.os_utils import temp_files_processor
# --- 2. 辅助函数 ---
# 判断单元格类型
def get_merge_type(merged_range):
"""
判断合并类型
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None不是合并单元格
"""
if not merged_range:
return None
min_row, max_row = merged_range.min_row, merged_range.max_row
min_col, max_col = merged_range.min_col, merged_range.max_col
if max_row > min_row and max_col > min_col:
return 'both' # 同时跨行和跨列
elif max_row > min_row:
return 'row' # 行合并(垂直合并)
elif max_col > min_col:
return 'column' # 列合并(水平合并)
else:
return None # 实际上不是合并单元格
# --- 3. 数据处理与分析 均值---
def process_data_for_table3(soil_prop_name, df_trlx_sample, df_trlx_zhitu, df_trlx, target_areas_df):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns) -> pd.DataFrame:
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_name
sample_fields = ['YL', 'TS', field_name]
df_samples = clean_df(df_trlx_sample, sample_fields)
df_samples["GRIDCODE"] = df_samples[field_name].astype(int)
# 通过土属计算母岩母质
df_samples['母岩'] = df_samples['TS'].apply(calculate_muyan)
df_samples['母质'] = df_samples['母岩'].apply(calculate_muzhi)
# 按 YJDL, EJDL 分组
df_sample_means = df_samples.groupby(['YL', 'TS', 'GRIDCODE']).size().reset_index(name="样点数")
total_sample_count = df_sample_means['样点数'].sum()
df_sample_means['样点数占比'] = df_sample_means['样点数'] / total_sample_count
# df_sample_mymz = df_samples.groupby(['母质', '母岩', 'TZ'])[field_name].agg(['count', 'mean', 'median']).reset_index()
# print(df_sample_mymz)
# ==b. 处理制图数据,获各等级制图面积
df_trlx_zhitu["YL"] = df_trlx_zhitu['YL_TS'].apply(lambda x: x.split('_')[0])
df_trlx_zhitu["TS"] = df_trlx_zhitu["YL_TS"].apply(lambda x: x.split('_')[1])
df_trlx_zhitu.columns = df_trlx_zhitu.columns.str.upper()
df_trlx_zhitu = clean_df(df_trlx_zhitu, ['YL', 'TS'])
df_map_data = df_trlx_zhitu.groupby(["YL","TS", "GRIDCODE"]).agg({"AREA": "sum"}).reset_index()
df_map_data['制图面积_原始'] = df_map_data['AREA'] * 0.0015 # 单位:亩
# ==c. 处理制图数据,获各等级制图面积
df_trlx = clean_df(df_trlx, ['YL', 'TS'])
df_trlx["面积"] = df_trlx["Shape@Area"] * 0.0015
# 拿到目标df总面积计算比例进行平差
target_areas = target_areas_df['面积'].sum()
original_area = df_trlx['面积'].sum()
adjusted_area_yz = target_areas / original_area
df_trlx["面积"] = df_trlx["面积"] * adjusted_area_yz
df_trlx_area = df_trlx.groupby(['YL', 'TS'])['面积'].sum().reset_index()
df_trlx_area['面积'] = pd.to_numeric(df_trlx_area['面积'], errors='coerce').fillna(0)
# ==========================
# 第三步:按二级地类分组计算平差系数
# 先计算每个二级地类的原始合计面积
ts_original_sum = df_map_data.groupby('TS')['制图面积_原始'].sum().reset_index()
ts_original_sum.rename(columns={'制图面积_原始': '原始合计面积'}, inplace=True)
# 合并目标面积
ts_adj = pd.merge(ts_original_sum, df_trlx_area, on='TS', how='left')
ts_adj.rename(columns={'面积': '目标合计面积'}, inplace=True)
# 填充无目标面积的二级地类(目标面积=原始面积,平差系数=1
ts_adj['目标合计面积'] = ts_adj['目标合计面积'].fillna(ts_adj['原始合计面积'])
# 计算平差系数(目标面积 / 原始面积避免除以0
ts_adj['平差系数'] = ts_adj['目标合计面积'] / ts_adj['原始合计面积'].replace(0, 1)
ts_adj['平差系数'] = ts_adj['平差系数'].fillna(1) # 极端情况填充1
# 第四步:应用平差系数到每个质地级别的制图面积
df_map_data = pd.merge(df_map_data, ts_adj[['TS', '平差系数']], on='TS', how='left')
df_map_data['平差系数'] = df_map_data['平差系数'].fillna(1) # 未匹配到的二级地类系数=1
# 计算平差后的制图面积
df_map_data['制图面积'] = df_map_data['制图面积_原始'] * df_map_data['平差系数']
# 重新计算面积占比(基于平差后的面积)
total_adjusted_area = df_map_data['制图面积'].sum()
df_map_data['面积占比'] = df_map_data['制图面积'] / total_adjusted_area
df_map_data = clean_df(df_map_data, ['YL', 'TS'])
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['YL', 'TS', 'GRIDCODE']],
df_map_data[['YL', 'TS', 'GRIDCODE']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['YL', 'TS', 'GRIDCODE'], how='left')
df_final = pd.merge(df_final, df_map_data, on=['YL', 'TS', 'GRIDCODE'], how='left')
# (可选) 按“亚类”和“土属”排序
in_yl_order = yl_order + [x for x in df_final['YL'].unique() if x not in yl_order]
in_ts_order = ts_order + [x for x in df_final['TS'].unique() if x not in ts_order]
df_final["YL"] = pd.Categorical(df_final['YL'], categories=in_yl_order, ordered=True)
df_final["TS"] = pd.Categorical(df_final['TS'], categories=in_ts_order, ordered=True)
df_final["GRIDCODE"] = pd.Categorical(df_final['GRIDCODE'], categories=sorted(df_final['GRIDCODE'].unique()), ordered=True)
df_final.sort_values(['YL', 'TS', 'GRIDCODE'], inplace=True)
print("数据处理流程完成!")
return df_final
# return df_final, df_sample_mymz
# 写入EXCEL 表2
def write_to_excel_table3(df, output_path, prop_config:dict):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同土壤类型属性变化统计"
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
cell_font = Font(name='等线', size=11)
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
def apply_style(cell_range, font, alignment=None, border=None):
for row in ws[cell_range]:
for cell in row:
cell.font = font
if alignment: cell.alignment = alignment
if border: cell.border = border
# --- b. 绘制表头 ---
ws.merge_cells('A1:B1'); ws['A1'] = '土壤类型'
ws.merge_cells('C1:E1'); ws['C1'] = '样点统计'
ws.merge_cells('F1:G1'); ws['F1'] = '制图统计'
ws['A2'] = '亚类'
ws['B2'] = '土属'
ws['C2'] = '质地类型'
ws['D2'] = '数量/个'
ws['E2'] = '占比%'
ws['F2'] = '面积/亩'
ws['G2'] = '占比%'
level_dict = prop_config['标准等级']
# 创建两个列表来分别存储上段和下段范围
upper_ranges = {value: key for key, value in level_dict.items()}
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('YL', sort=False, observed=True):
if group_yl_df.empty:
continue
print(f"正在写入亚类: {yl}...")
yl_start_row = current_row
# 按二级地类分组
for ts, group_ts_df in group_yl_df.groupby('TS', sort=False, observed=False):
if group_ts_df.empty:
continue
print(f"正在写入二级地类: {ts}...")
ts_start_row = current_row
# 遍历该亚类下下的所有“土属”
for _, row_data in group_ts_df.iterrows():
ws.cell(row=current_row, column=3).value = upper_ranges.get(str(row_data['GRIDCODE']), '-')
# 填充样点数据
ws.cell(row=current_row, column=4).value = row_data['样点数'] if not np.isnan(row_data['样点数']) else '-'
ws.cell(row=current_row, column=5).value = round(row_data['样点数占比']*100, 2) if not np.isnan(row_data['样点数占比']) else '-'
# 填充制图数据
ws.cell(row=current_row, column=6).value = round(row_data['制图面积'], 0) if not np.isnan(row_data['制图面积']) else '-'
ws.cell(row=current_row, column=7).value = round(row_data['面积占比']*100, 2) if not np.isnan(row_data['面积占比']) else '-'
current_row += 1
# 合并二级地类单元格
if ts_start_row <= current_row:
ws.merge_cells(start_row=ts_start_row, start_column=2, end_row=current_row-1, end_column=2)
ws.cell(row=ts_start_row, column=2).value = ts
# 合并“一级地类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row-1, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
# 计算全区的均值、范围、数量
total_areas = df_to_write['制图面积'].sum()
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=1).value = '全区'
ws.cell(row=current_row, column=4).value = df_to_write['样点数'].sum()
ws.cell(row=current_row, column=5).value = round(df_to_write['样点数占比'].sum()*100, 2)
ws.cell(row=current_row, column=6).value = round(total_areas, 0)
ws.cell(row=current_row, column=7).value = round(df_to_write['面积占比'].sum()*100, 2)
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
apply_style(f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
dims = {}
for row in ws.rows:
for cell in row:
if cell.value:
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
if get_merge_type(merged_range) == 'column':
continue
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
# 设置列宽
for col, value in dims.items():
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
# 母岩母质表
def write_to_excel_table4(df:pd.DataFrame, output_path, prop_config):
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "母岩母质土壤属性统计"
# 获取属性单位
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅','全钾','沙粒','粉粒','粘粒']
prop_name_str = prop_config.get('项目分级','')
if prop_name_str:
prop_name = prop_name_str.split('\n')[0].strip() in special_prop
else:
prop_name = False
prop_unit_str = prop_config.get('分级标准', '')
if prop_unit_str:
prop_unit = prop_unit_str.split('\n')[1].strip()
else:
prop_unit = ''
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
cell_font = Font(name='等线', size=11)
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
def apply_style(cell_range, font, alignment=None, border=None):
for row in ws[cell_range]:
for cell in row:
cell.font = font
if alignment: cell.alignment = alignment
if border: cell.border = border
# 写入表头
headers = ['母岩母质','', '土种类型', '样点统计', '']
ws.append(headers)
ws.append(['', '', '', f'均值/{prop_unit}', '数量/个'])
# 合并表头单元格
ws.merge_cells('A1:B2') # 母岩母质
ws.merge_cells('C1:C2') # 土种类型
ws.merge_cells('D1:E1') # 样点统计
current_row = 3
# 按母质和母岩进行分组
grouped = df.groupby(['母质', '母岩']).agg({
'TZ': lambda x: ''.join(x), # 将土种名称用逗号连接
'mean': 'mean', # 计算均值
'count': 'sum' # 计算总数
}).reset_index()
parent_materials = grouped['母质'].unique()
for parent_material in parent_materials:
parent_material_row = current_row
if parent_material == '未知':
continue
material_group = grouped[grouped['母质'] == parent_material]
# 写入母岩母质分组(只在第一行显示)
first_row_in_group = True
for _, row_data in material_group.iterrows():
if first_row_in_group:
# 第一行显示母岩母质名称
ws.cell(row=current_row, column=1, value=parent_material)
first_row_in_group = False
else:
# 后续行留空
ws.cell(row=current_row, column=1, value='')
# 写入母岩类型
ws.cell(row=current_row, column=2, value=row_data['母岩'])
# 写入土种类型(所有土种用逗号连接)
ws.cell(row=current_row, column=3, value=row_data['TZ'])
# 写入统计数据
ws.cell(row=current_row, column=4, value=round(row_data['mean'], 1))
ws.cell(row=current_row, column=5, value=row_data['count'])
current_row += 1
# 合并母岩母质分组
if parent_material_row < current_row:
ws.merge_cells(start_row=parent_material_row, start_column=1, end_row=current_row - 1, end_column=1)
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=1, value='全区')
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
apply_style(f'A1:{max_col_letter}2', header_font)
# 设置列宽
ws.column_dimensions["A"].width = 20
ws.column_dimensions["B"].width = 20
ws.column_dimensions["C"].width = 30
ws.column_dimensions["D"].width = 20
ws.column_dimensions["E"].width = 20
# 保存文件
wb.save(output_path)
print(f"数据已成功写入到 {output_path}")
def main(gdb_path, soil_prop_name, trlx_features, reclassed_feature, output_path,target_areas_df, prop_config):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path,f"{soil_prop_name}不同土壤类型土壤.xlsx") # 生成的Excel报告文件路径
# output_excel4_path = os.path.join(output_path,f"{soil_prop_name}不同母岩母质土壤属性.xlsx")
soil_prop_features = os.path.join(gdb_path,soil_prop_name)
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
temp_out_features = r"in_memory/temp_out_type_features"
out_table_mean = r"in_memory/out_table_type_mean"
temp_files.append(temp_out_features)
temp_files.append(out_table_mean)
# 2. 用样点进行空间连接到土壤类型图斑
fields_to_keep = {
soil_prop_features: [soil_prop_name],
trlx_features: ["YL", "TS", "TZ"],
}
field_mappings = arcpy.FieldMappings()
for join_features in fields_to_keep.keys():
for field_name in fields_to_keep[join_features]:
try:
field_map = arcpy.FieldMap()
field_map.addInputField(join_features, field_name)
field_map.mergeRule = "First" # 对所有连接字段使用 "First" 规则
field_mappings.addFieldMap(field_map)
except Exception as e:
print(f"警告: 添加字段 '{field_name}' (来自 '{join_features}') 时出错,将跳过。错误信息: {e}")
# 空间连接
arcpy.analysis.SpatialJoin(soil_prop_features, trlx_features, temp_out_features, "JOIN_ONE_TO_ONE", "KEEP_ALL",field_mappings, "INTERSECT")
# 3. 交集制表计算每个TRZD的面积
arcpy.analysis.TabulateIntersection(trlx_features, "YL_TS", reclassed_feature, out_table_mean, "gridcode", out_units="SQUARE_METERS")
trlx_zhitu_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YL_TS", "gridcode", "AREA"]))
trlx_sample_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(temp_out_features, ["YL", "TS", "TZ", soil_prop_name]))
# 获取土壤类型图斑面积
trlx_area_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(trlx_features, ["YL", "TS", "Shape@Area"]))
# 处理表3数据
final_dataframe = process_data_for_table3(soil_prop_name,trlx_sample_df, trlx_zhitu_df, trlx_area_df, target_areas_df)
# print(final_dataframe)
# 生成表3
write_to_excel_table3(final_dataframe, output_excel_path, prop_config)
# 母岩母质表
# write_to_excel_table4(df_mymz, output_excel4_path, prop_config)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

View File

@@ -0,0 +1,465 @@
# -*- coding: utf-8 -*-
import os
import re
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font, Border, Side, Alignment
from openpyxl.utils import get_column_letter
from tools.config.pandas_field_cal_func import calculate_muyan, calculate_muzhi
from tools.config.custom_sort import yl_order, ts_order
from tools.core.utils.os_utils import temp_files_processor
# --- 2. 辅助函数 ---
def get_prop_level(prop_level):
"""根据输入值判断 返回等级"""
if pd.isna(prop_level) or prop_level == 0:
return "-"
# 请根据您的实际分级标准调整这里的阈值
if int(prop_level) == 5 or prop_level == "砂质":
return "砂质"
elif int(prop_level) == 4 or prop_level == "砂壤质":
return "砂壤质"
elif int(prop_level) == 3 or prop_level == "壤质":
return "壤质"
elif int(prop_level) == 1 or prop_level == "黏壤质":
return "黏壤质"
elif int(prop_level) == 2 or prop_level == "黏质":
return "黏质"
else:
return "-"
# 判断单元格类型
def get_merge_type(merged_range):
"""
判断合并类型
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None不是合并单元格
"""
if not merged_range:
return None
min_row, max_row = merged_range.min_row, merged_range.max_row
min_col, max_col = merged_range.min_col, merged_range.max_col
if max_row > min_row and max_col > min_col:
return 'both' # 同时跨行和跨列
elif max_row > min_row:
return 'row' # 行合并(垂直合并)
elif max_col > min_col:
return 'column' # 列合并(水平合并)
else:
return None # 实际上不是合并单元格
# --- 3. 数据处理与分析 均值---
def process_data_for_table3(soil_prop_name, df_trlx_sample, df_trlx_zhitu, df_trlx, target_areas_df):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns) -> pd.DataFrame:
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_name
sample_fields = ['YL', 'TS', field_name]
df_samples = clean_df(df_trlx_sample, sample_fields)
df_samples["GRIDCODE"] = df_samples[field_name]
# 通过土属计算母岩母质
df_samples['母岩'] = df_samples['TS'].apply(calculate_muyan)
df_samples['母质'] = df_samples['母岩'].apply(calculate_muzhi)
# 按 YJDL, EJDL 分组
df_sample_means = df_samples.groupby(['YL', 'TS', 'GRIDCODE']).size().reset_index(name="样点数")
total_sample_count = df_sample_means['样点数'].sum()
df_sample_means['样点数占比'] = df_sample_means['样点数'] / total_sample_count
# df_sample_mymz = df_samples.groupby(['母质', '母岩', 'TZ'])[field_name].agg(['count', 'mean', 'median']).reset_index()
# print(df_sample_mymz)
# ==b. 处理制图数据,获各等级制图面积
df_trlx_zhitu["YL"] = df_trlx_zhitu['YL_TS'].apply(lambda x: x.split('_')[0])
df_trlx_zhitu["TS"] = df_trlx_zhitu["YL_TS"].apply(lambda x: x.split('_')[1])
df_trlx_zhitu.columns = df_trlx_zhitu.columns.str.upper()
df_trlx_zhitu = clean_df(df_trlx_zhitu, ['YL', 'TS'])
df_trlx_zhitu['GRIDCODE'] = df_trlx_zhitu['GRIDCODE'].apply(get_prop_level)
df_map_data = df_trlx_zhitu.groupby(["YL","TS", "GRIDCODE"]).agg({"AREA": "sum"}).reset_index()
df_map_data['制图面积_原始'] = df_map_data['AREA'] * 0.0015 # 单位:亩
# ==c. 处理制图数据,获各等级制图面积
df_trlx = clean_df(df_trlx, ['YL', 'TS'])
df_trlx["面积"] = df_trlx["Shape@Area"] * 0.0015
# 拿到目标df总面积计算比例进行平差
target_areas = target_areas_df['面积'].sum()
original_area = df_trlx['面积'].sum()
adjusted_area_yz = target_areas / original_area
df_trlx["面积"] = df_trlx["面积"] * adjusted_area_yz
df_trlx_area = df_trlx.groupby(['YL', 'TS'])['面积'].sum().reset_index()
df_trlx_area['面积'] = pd.to_numeric(df_trlx_area['面积'], errors='coerce').fillna(0)
# ==========================
# 第三步:按二级地类分组计算平差系数
# 先计算每个二级地类的原始合计面积
ts_original_sum = df_map_data.groupby('TS')['制图面积_原始'].sum().reset_index()
ts_original_sum.rename(columns={'制图面积_原始': '原始合计面积'}, inplace=True)
# 合并目标面积
ts_adj = pd.merge(ts_original_sum, df_trlx_area, on='TS', how='left')
ts_adj.rename(columns={'面积': '目标合计面积'}, inplace=True)
# 填充无目标面积的二级地类(目标面积=原始面积,平差系数=1
ts_adj['目标合计面积'] = ts_adj['目标合计面积'].fillna(ts_adj['原始合计面积'])
# 计算平差系数(目标面积 / 原始面积避免除以0
ts_adj['平差系数'] = ts_adj['目标合计面积'] / ts_adj['原始合计面积'].replace(0, 1)
ts_adj['平差系数'] = ts_adj['平差系数'].fillna(1) # 极端情况填充1
# 第四步:应用平差系数到每个质地级别的制图面积
df_map_data = pd.merge(df_map_data, ts_adj[['TS', '平差系数']], on='TS', how='left')
df_map_data['平差系数'] = df_map_data['平差系数'].fillna(1) # 未匹配到的二级地类系数=1
# 计算平差后的制图面积
df_map_data['制图面积'] = df_map_data['制图面积_原始'] * df_map_data['平差系数']
# 重新计算面积占比(基于平差后的面积)
total_adjusted_area = df_map_data['制图面积'].sum()
df_map_data['面积占比'] = df_map_data['制图面积'] / total_adjusted_area
df_map_data = clean_df(df_map_data, ['YL', 'TS'])
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['YL', 'TS', 'GRIDCODE']],
df_map_data[['YL', 'TS', 'GRIDCODE']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['YL', 'TS', 'GRIDCODE'], how='left')
df_final = pd.merge(df_final, df_map_data, on=['YL', 'TS', 'GRIDCODE'], how='left')
# (可选) 按“亚类”和“土属”排序
in_yl_order = yl_order + [x for x in df_final['YL'].unique() if x not in yl_order]
in_ts_order = ts_order + [x for x in df_final['TS'].unique() if x not in ts_order]
df_final["YL"] = pd.Categorical(df_final['YL'], categories=in_yl_order, ordered=True)
df_final["TS"] = pd.Categorical(df_final['TS'], categories=in_ts_order, ordered=True)
df_final["GRIDCODE"] = pd.Categorical(df_final['GRIDCODE'], categories=sorted(df_final['GRIDCODE'].unique()), ordered=True)
df_final.sort_values(['YL', 'TS', 'GRIDCODE'], inplace=True)
print("数据处理流程完成!")
return df_final
# return df_final, df_sample_mymz
# 写入EXCEL 表2
def write_to_excel_table3(df, output_path, prop_config:dict):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同土壤类型属性变化统计"
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
cell_font = Font(name='等线', size=11)
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
def apply_style(cell_range, font, alignment=None, border=None):
for row in ws[cell_range]:
for cell in row:
cell.font = font
if alignment: cell.alignment = alignment
if border: cell.border = border
# --- b. 绘制表头 ---
ws.merge_cells('A1:B1'); ws['A1'] = '土壤类型'
ws.merge_cells('C1:E1'); ws['C1'] = '样点统计'
ws.merge_cells('F1:G1'); ws['F1'] = '制图统计'
ws['A2'] = '亚类'
ws['B2'] = '土属'
ws['C2'] = '质地类型'
ws['D2'] = '数量/个'
ws['E2'] = '占比%'
ws['F2'] = '面积/亩'
ws['G2'] = '占比%'
level_dict = prop_config['标准等级']
# 创建两个列表来分别存储上段和下段范围
upper_ranges = {value: key for key, value in level_dict.items()}
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('YL', sort=False, observed=True):
if group_yl_df.empty:
continue
print(f"正在写入亚类: {yl}...")
yl_start_row = current_row
# 按二级地类分组
for ts, group_ts_df in group_yl_df.groupby('TS', sort=False, observed=False):
if group_ts_df.empty:
continue
print(f"正在写入二级地类: {ts}...")
ts_start_row = current_row
# 遍历该亚类下下的所有“土属”
for _, row_data in group_ts_df.iterrows():
ws.cell(row=current_row, column=3).value = row_data['GRIDCODE']
# 填充样点数据
ws.cell(row=current_row, column=4).value = row_data['样点数'] if not np.isnan(row_data['样点数']) else '-'
ws.cell(row=current_row, column=5).value = round(row_data['样点数占比']*100, 2) if not np.isnan(row_data['样点数占比']) else '-'
# 填充制图数据
ws.cell(row=current_row, column=6).value = round(row_data['制图面积'], 0) if not np.isnan(row_data['制图面积']) else '-'
ws.cell(row=current_row, column=7).value = round(row_data['面积占比']*100, 2) if not np.isnan(row_data['面积占比']) else '-'
current_row += 1
# 合并二级地类单元格
if ts_start_row <= current_row:
ws.merge_cells(start_row=ts_start_row, start_column=2, end_row=current_row-1, end_column=2)
ws.cell(row=ts_start_row, column=2).value = ts
# 合并“一级地类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row-1, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
# 计算全区的均值、范围、数量
total_areas = df_to_write['制图面积'].sum()
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=1).value = '全区'
ws.cell(row=current_row, column=4).value = df_to_write['样点数'].sum()
ws.cell(row=current_row, column=5).value = round(df_to_write['样点数占比'].sum()*100, 2)
ws.cell(row=current_row, column=6).value = round(total_areas, 0)
ws.cell(row=current_row, column=7).value = round(df_to_write['面积占比'].sum()*100, 2)
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
apply_style(f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
dims = {}
for row in ws.rows:
for cell in row:
if cell.value:
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
if get_merge_type(merged_range) == 'column':
continue
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
# 设置列宽
for col, value in dims.items():
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
# 母岩母质表
def write_to_excel_table4(df:pd.DataFrame, output_path, prop_config):
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "母岩母质土壤属性统计"
# 获取属性单位
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅','全钾','沙粒','粉粒','粘粒']
prop_name_str = prop_config.get('项目分级','')
if prop_name_str:
prop_name = prop_name_str.split('\n')[0].strip() in special_prop
else:
prop_name = False
prop_unit_str = prop_config.get('分级标准', '')
if prop_unit_str:
prop_unit = prop_unit_str.split('\n')[1].strip()
else:
prop_unit = ''
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
cell_font = Font(name='等线', size=11)
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
def apply_style(cell_range, font, alignment=None, border=None):
for row in ws[cell_range]:
for cell in row:
cell.font = font
if alignment: cell.alignment = alignment
if border: cell.border = border
# 写入表头
headers = ['母岩母质','', '土种类型', '样点统计', '']
ws.append(headers)
ws.append(['', '', '', f'均值/{prop_unit}', '数量/个'])
# 合并表头单元格
ws.merge_cells('A1:B2') # 母岩母质
ws.merge_cells('C1:C2') # 土种类型
ws.merge_cells('D1:E1') # 样点统计
current_row = 3
# 按母质和母岩进行分组
grouped = df.groupby(['母质', '母岩']).agg({
'TZ': lambda x: ''.join(x), # 将土种名称用逗号连接
'mean': 'mean', # 计算均值
'count': 'sum' # 计算总数
}).reset_index()
parent_materials = grouped['母质'].unique()
for parent_material in parent_materials:
parent_material_row = current_row
if parent_material == '未知':
continue
material_group = grouped[grouped['母质'] == parent_material]
# 写入母岩母质分组(只在第一行显示)
first_row_in_group = True
for _, row_data in material_group.iterrows():
if first_row_in_group:
# 第一行显示母岩母质名称
ws.cell(row=current_row, column=1, value=parent_material)
first_row_in_group = False
else:
# 后续行留空
ws.cell(row=current_row, column=1, value='')
# 写入母岩类型
ws.cell(row=current_row, column=2, value=row_data['母岩'])
# 写入土种类型(所有土种用逗号连接)
ws.cell(row=current_row, column=3, value=row_data['TZ'])
# 写入统计数据
ws.cell(row=current_row, column=4, value=round(row_data['mean'], 1))
ws.cell(row=current_row, column=5, value=row_data['count'])
current_row += 1
# 合并母岩母质分组
if parent_material_row < current_row:
ws.merge_cells(start_row=parent_material_row, start_column=1, end_row=current_row - 1, end_column=1)
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=1, value='全区')
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
apply_style(f'A1:{max_col_letter}2', header_font)
# 设置列宽
ws.column_dimensions["A"].width = 20
ws.column_dimensions["B"].width = 20
ws.column_dimensions["C"].width = 30
ws.column_dimensions["D"].width = 20
ws.column_dimensions["E"].width = 20
# 保存文件
wb.save(output_path)
print(f"数据已成功写入到 {output_path}")
def main(gdb_path, soil_prop_name, trlx_features, reclassed_feature, output_path,target_areas_df, prop_config):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path,f"{soil_prop_name}不同土壤类型土壤.xlsx") # 生成的Excel报告文件路径
# output_excel4_path = os.path.join(output_path,f"{soil_prop_name}不同母岩母质土壤属性.xlsx")
soil_prop_features = os.path.join(gdb_path,soil_prop_name)
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
temp_out_features = r"in_memory/temp_out_type_features"
out_table_mean = r"in_memory/out_table_type_mean"
temp_files.append(temp_out_features)
temp_files.append(out_table_mean)
# 2. 用样点进行空间连接到土壤类型图斑
fields_to_keep = {
soil_prop_features: [soil_prop_name],
trlx_features: ["YL", "TS", "TZ"],
}
field_mappings = arcpy.FieldMappings()
for join_features in fields_to_keep.keys():
for field_name in fields_to_keep[join_features]:
try:
field_map = arcpy.FieldMap()
field_map.addInputField(join_features, field_name)
field_map.mergeRule = "First" # 对所有连接字段使用 "First" 规则
field_mappings.addFieldMap(field_map)
except Exception as e:
print(f"警告: 添加字段 '{field_name}' (来自 '{join_features}') 时出错,将跳过。错误信息: {e}")
# 空间连接
arcpy.analysis.SpatialJoin(soil_prop_features, trlx_features, temp_out_features, "JOIN_ONE_TO_ONE", "KEEP_ALL",field_mappings, "INTERSECT")
# 3. 交集制表计算每个TRZD的面积
arcpy.analysis.TabulateIntersection(trlx_features, "YL_TS", reclassed_feature, out_table_mean, "gridcode", out_units="SQUARE_METERS")
trlx_zhitu_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YL_TS", "gridcode", "AREA"]))
trlx_sample_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(temp_out_features, ["YL", "TS", "TZ", soil_prop_name]))
# 获取土壤类型图斑面积
trlx_area_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(trlx_features, ["YL", "TS", "Shape@Area"]))
# 处理表3数据
final_dataframe = process_data_for_table3(soil_prop_name,trlx_sample_df, trlx_zhitu_df, trlx_area_df, target_areas_df)
# print(final_dataframe)
# 生成表3
write_to_excel_table3(final_dataframe, output_excel_path, prop_config)
# 母岩母质表
# write_to_excel_table4(df_mymz, output_excel4_path, prop_config)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

View File

@@ -0,0 +1,512 @@
# -*- coding: utf-8 -*-
import os
import arcpy
import pandas as pd
import numpy as np
from openpyxl import Workbook
from openpyxl.styles import Font
from openpyxl.utils import get_column_letter
from tools.config.pandas_field_cal_func import calculate_muyan, calculate_muzhi
from tools.config.custom_sort import yl_order, ts_order
from tools.core.utils.os_utils import temp_files_processor
from tools.core.utils.excel_utils import ExcelStyleUtils
# --- 3. 数据处理与分析 均值---
def process_data_for_table3(soil_prop_name, df_trlx_sample, df_trlx_zhitu, df_trlx, target_areas_df):
"""
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
"""
print("开始处理数据...")
def clean_df(df, columns) -> pd.DataFrame:
for col in columns:
df[col] = df[col].astype(str).str.strip()
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
df.dropna(subset=columns, inplace=True)
return df
# ==a. 处理样点数据,计算“样点均值” ---
print("--> 步骤1: 计算样点均值...")
field_name = soil_prop_name
sample_fields = ['YL', 'TS', field_name]
df_samples = clean_df(df_trlx_sample, sample_fields)
df_samples[field_name] = df_samples[field_name].astype(float)
# 通过土属计算母岩母质
df_samples['母岩'] = df_samples['TS'].apply(calculate_muyan)
df_samples['母质'] = df_samples['母岩'].apply(calculate_muzhi)
# 按 YJDL, EJDL 分组,计算 dPH 的均值
df_sample_means = df_samples.groupby(['YL', 'TS'])[field_name].agg(['count', 'max', 'min', 'mean', 'median']).reset_index()
df_sample_mymz = df_samples.groupby(['母质', '母岩', 'TZ'])[field_name].agg(['count', 'mean', 'median']).reset_index()
# print(df_sample_mymz)
# ==b. 处理制图数据,获各等级制图面积
df_trlx_zhitu["YL"] = df_trlx_zhitu['YL_TS'].apply(lambda x: x.split('_')[0])
df_trlx_zhitu["TS"] = df_trlx_zhitu["YL_TS"].apply(lambda x: x.split('_')[1])
df_trlx_zhitu = clean_df(df_trlx_zhitu, ['YL', 'TS'])
df_trlx_zhitu.rename(columns={'MEAN': '制图均值', 'COUNT': '制图样点数'}, inplace=True)
# ==c. 处理制图数据,获各等级制图面积
df_trlx = clean_df(df_trlx, ['YL', 'TS'])
df_trlx["面积_亩"] = df_trlx["Shape@Area"] * 0.0015
filtered_props = ['ECA', 'EMG', 'ACU', 'AZN', 'AFE', 'AMN', 'AMO', 'AB', 'AS1', 'TSE']
# 拿到目标df总面积计算比例进行平差
print(target_areas_df)
if soil_prop_name == "GZCHD":
target_areas = target_areas_df[target_areas_df['EJDL'] == '耕地']['面积'].values[0]
elif soil_prop_name in filtered_props:
target_areas = target_areas_df[target_areas_df['EJDL'].isin(['耕地', '园地'])]['面积'].sum()
else:
target_areas = target_areas_df['面积'].sum()
original_area = df_trlx['面积_亩'].sum()
adjusted_area_yz = target_areas / original_area
df_trlx["面积_亩"] = df_trlx["面积_亩"] * adjusted_area_yz
df_trlx_area = df_trlx.groupby(['YL', 'TS'])['面积_亩'].sum().reset_index()
# --- c. 合并数据 ---
print("--> 步骤3: 合并数据...")
df_skeleton = pd.concat([
df_sample_means[['YL', 'TS']],
df_trlx_zhitu[['YL', 'TS']]
]).drop_duplicates().reset_index(drop=True)
df_final = pd.merge(df_skeleton, df_sample_means, on=['YL', 'TS'], how='left')
df_final = pd.merge(df_final, df_trlx_zhitu, on=['YL', 'TS'], how='left')
df_final = pd.merge(df_final, df_trlx_area, on=['YL', 'TS'], how='left')
# (可选) 按“亚类”和“土属”排序
in_yl_order = yl_order + [x for x in df_final['YL'].unique() if x not in yl_order]
in_ts_order = ts_order + [x for x in df_final['TS'].unique() if x not in ts_order]
df_final["YL"] = pd.Categorical(df_final['YL'], categories=in_yl_order, ordered=True)
df_final["TS"] = pd.Categorical(df_final['TS'], categories=in_ts_order, ordered=True)
df_final.sort_values(['YL', 'TS'], inplace=True)
print("数据处理流程完成!")
return df_final, df_sample_mymz
# 写入EXCEL 表2
def write_to_excel_table3(df, output_path, prop_config:dict, stats):
"""
将处理好的数据写入格式化的 Excel 文件。
"""
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
print(f"开始生成 Excel 报告到 '{output_path}'...")
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "不同土壤类型属性变化统计"
# 获取属性单位
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅','全钾']
fsn_props = ['砂粒含量','粉粒含量','黏粒含量']
prop_name_str = prop_config.get('项目分级','')
if prop_name_str:
split_name = prop_name_str.split('\n')[0].strip()
if split_name in special_prop:
prop_name = '1f'
elif split_name in fsn_props:
prop_name = '0f'
else:
prop_name = '2f'
else:
prop_name = '1f'
prop_unit_str = prop_config.get('分级标准', '')
if prop_unit_str:
prop_unit = prop_unit_str.split('\n')[1].strip()
else:
prop_unit = ''
# --- b. 绘制表头 ---
ws.merge_cells('A1:B1'); ws['A1'] = '土壤类型'
ws.merge_cells('C1:F1'); ws['C1'] = '样点统计'
ws.merge_cells('G1:H1'); ws['G1'] = '制图统计'
ws['A2'] = '亚类'
ws['B2'] = '土属'
ws['C2'] = '均值/' + prop_unit
ws['D2'] = '中位值/' + prop_unit
ws['E2'] = '范围/' + prop_unit
ws['F2'] = '数量/个'
ws['G2'] = '均值/' + prop_unit
ws['H2'] = '面积/亩'
# --- c. 填充数据 ---
current_row = 3
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
for yl, group_yl_df in df_to_write.groupby('YL', sort=False, observed=True):
print(f"正在写入亚类: {yl}...")
yl_start_row = current_row
# 遍历该亚类下下的所有“土属”
for _, row_data in group_yl_df.iterrows():
ws.cell(row=current_row, column=2).value = row_data['TS']
# 填充样点数据
sample_mean = row_data.get('mean')
if pd.notna(sample_mean):
ws.cell(row=current_row, column=3).value = f"{sample_mean:.{prop_name}}"
ws.cell(row=current_row, column=4).value = f"{row_data.get('median', '-'):.{prop_name}}"
ws.cell(row=current_row, column=5).value = f"{row_data.get('min', '-'):.{prop_name}}{row_data.get('max', '-'):.{prop_name}}"
ws.cell(row=current_row, column=6).value = row_data.get('count', '-')
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = "-"
ws.cell(row=current_row, column=5).value = "-"
ws.cell(row=current_row, column=6).value = "-"
# 填充制图数据
map_mean = row_data.get('制图均值')
if pd.notna(map_mean):
ws.cell(row=current_row, column=7).value = f"{map_mean:.{prop_name}}"
ws.cell(row=current_row, column=8).value = f"{row_data.get('面积_亩', '-'):.0f}"
else:
ws.cell(row=current_row, column=7).value = "-"
ws.cell(row=current_row, column=8).value = "-"
current_row += 1
# 计算并写入“合计”行
if ws.cell(row=current_row-1, column=2).value in ["林地", "草地", "其他"]:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=yl_start_row, end_column=2)
ws.cell(row=yl_start_row, column=1).value = yl
continue
ws.cell(row=current_row, column=2).value = '合计'
# 计算合计行的均值 (均值的均值)
total_count = group_yl_df['count'].sum()
weighted_sum = group_yl_df['mean'] * group_yl_df['count']
if not weighted_sum.empty and total_count != 0:
total_sample_mean = weighted_sum.sum() / total_count
else:
total_sample_mean = None
total_median = group_yl_df['median'].mean()
min_min, max_max = group_yl_df['min'].min(), group_yl_df['max'].max()
if pd.notna(total_sample_mean):
ws.cell(row=current_row, column=3).value = f"{total_sample_mean:.{prop_name}}"
ws.cell(row=current_row, column=4).value = f"{total_median:.{prop_name}}"
ws.cell(row=current_row, column=5).value = f"{min_min:.{prop_name}}{max_max:.{prop_name}}"
ws.cell(row=current_row, column=6).value = f"{total_count:.0f}"
else:
ws.cell(row=current_row, column=3).value = "-"
ws.cell(row=current_row, column=4).value = "-"
ws.cell(row=current_row, column=5).value = "-"
ws.cell(row=current_row, column=6).value = "-"
# b. **【核心修正】: 计算合计行的“制图均值”(加权平均)**
# 准备加权平均的分子和分母
weighted_sum = 0
total_count = 0
# 遍历当前一级地类分组中的每一行
for _, row in group_yl_df.iterrows():
mean_val = row.get('制图均值')
count_val = row.get('制图样点数')
# 只有当均值和样点数都存在且有效时,才参与计算
if pd.notna(mean_val) and pd.notna(count_val) and count_val > 0:
weighted_sum += mean_val * count_val # Σ (mean * count)
total_count += count_val # Σ (count)
# 计算加权平均值
weighted_avg = (weighted_sum / total_count) if total_count > 0 else 0
total_area = group_yl_df['面积_亩'].sum()
if weighted_avg > 0:
ws.cell(row=current_row, column=7).value = f"{weighted_avg:.{prop_name}}"
ws.cell(row=current_row, column=8).value = f"{total_area:.0f}"
else:
ws.cell(row=current_row, column=7).value = "-"
ws.cell(row=current_row, column=8).value = "-"
# 合并“一级地类”单元格
if yl_start_row <= current_row:
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
ws.cell(row=yl_start_row, column=1).value = yl
current_row += 1
# 计算全区的均值、范围、数量
# total_counts = df_to_write['count'].sum()
# total_weighted_sum = df_to_write['mean'] * df_to_write['count']
# total_mean = total_weighted_sum.sum() / total_counts
# total_median = df_to_write['median'].mean()
total_range = f"{df_to_write['min'].min():.{prop_name}}{df_to_write['max'].max():.{prop_name}}"
total_zhitu_weighted_sum = df_to_write['制图均值'] * df_to_write['面积_亩']
total_areas = df_to_write['面积_亩'].sum()
total_zhitu_mean = total_zhitu_weighted_sum.sum() / total_areas
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=2)
ws.cell(row=current_row, column=1).value = '全区'
ws.cell(row=current_row, column=3).value = f"{stats['mean']:.{prop_name}}"
ws.cell(row=current_row, column=4).value = f"{stats['median']:.{prop_name}}"
ws.cell(row=current_row, column=5).value = total_range
ws.cell(row=current_row, column=6).value = f"{stats['count']:.0f}"
ws.cell(row=current_row, column=7).value = f"{total_zhitu_mean:.{prop_name}}"
ws.cell(row=current_row, column=8).value = f"{total_areas:.0f}"
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}{current_row}')
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
print("正在自动调整列宽...")
# 设置列宽
ExcelStyleUtils.auto_adjust_column_width(ws)
# --- e. 保存文件 ---
wb.save(output_path)
print("Excel 报告生成成功!")
# 母岩母质表
def write_to_excel_table4(df:pd.DataFrame, output_path, prop_config, stats):
if df.empty:
print("警告: 没有数据可以写入 Excel。")
return
wb = Workbook()
ws = wb.create_sheet("Mysheet", 0)
ws.title = "母岩母质土壤属性统计"
# 获取属性单位
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅','全钾']
fsn_props = ['砂粒含量','粉粒含量','黏粒含量','有效土层厚度']
prop_name_str = prop_config.get('项目分级','')
if prop_name_str:
split_name = prop_name_str.split('\n')[0].strip()
if split_name in special_prop:
prop_name = '1f'
elif split_name in fsn_props:
prop_name = '0f'
else:
prop_name = '2f'
else:
prop_name = '1f'
prop_unit_str = prop_config.get('分级标准', '')
if prop_unit_str:
prop_unit = prop_unit_str.split('\n')[1].strip()
else:
prop_unit = ''
# 写入表头
headers = ['母岩母质','', '土种类型', '样点统计', '']
ws.append(headers)
ws.append(['', '', '', f'均值/{prop_unit}', '数量/个'])
# 合并表头单元格
ws.merge_cells('A1:B2') # 母岩母质
ws.merge_cells('C1:C2') # 土种类型
ws.merge_cells('D1:E1') # 样点统计
current_row = 3
# 按母质和母岩进行分组
grouped = df.groupby(['母质', '母岩']).agg({
'TZ': lambda x: ''.join(x), # 将土种名称用逗号连接
'mean': 'mean', # 计算均值
'count': 'sum' # 计算总数
}).reset_index()
parent_materials = grouped['母质'].unique()
for parent_material in parent_materials:
parent_material_row = current_row
if parent_material == '未知':
continue
material_group = grouped[grouped['母质'] == parent_material]
# 写入母岩母质分组(只在第一行显示)
first_row_in_group = True
for _, row_data in material_group.iterrows():
if first_row_in_group:
# 第一行显示母岩母质名称
ws.cell(row=current_row, column=1, value=parent_material)
first_row_in_group = False
else:
# 后续行留空
ws.cell(row=current_row, column=1, value='')
# 写入母岩类型
ws.cell(row=current_row, column=2, value=row_data['母岩'])
# 写入土种类型(所有土种用逗号连接)
ws.cell(row=current_row, column=3, value=row_data['TZ'])
# 写入统计数据
ws.cell(row=current_row, column=4, value=round(row_data['mean'], 1))
ws.cell(row=current_row, column=5, value=row_data['count'])
current_row += 1
# 合并母岩母质分组
if parent_material_row < current_row:
ws.merge_cells(start_row=parent_material_row, start_column=1, end_row=current_row - 1, end_column=1)
# 计算合计值并写入
# total_mean = 0
# total_count = df['count'].sum()
# total_sum = df['mean'] * df['count']
# if total_count and total_count!=0:
# total_mean = total_sum.sum() / total_count
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
ws.cell(row=current_row, column=1, value='全区')
ws.cell(row=current_row, column=4, value=f"{stats['mean']:.{prop_name}}")
ws.cell(row=current_row, column=5, value=f"{stats['count']:.0f}")
# --- a. 定义样式 ---
header_font = Font(name='等线', size=11, bold=True)
# --- d. 应用样式和调整列宽 ---
max_col_letter = get_column_letter(ws.max_column)
if current_row > 1: # 确保有数据才应用样式
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}{current_row}')
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
# 设置列宽
ws.column_dimensions["A"].width = 20
ws.column_dimensions["B"].width = 20
ws.column_dimensions["C"].width = 30
ws.column_dimensions["D"].width = 20
ws.column_dimensions["E"].width = 20
# 保存文件
wb.save(output_path)
print(f"数据已成功写入到 {output_path}")
def main(gdb_path, soil_prop_name, trlx_features, soil_prop_tif, output_path,target_areas_df, prop_config, dltb_features):
try:
# --- 1. 用户配置 ---
# 输出配置
temp_files = []
output_excel_path = os.path.join(output_path,f"{soil_prop_name}不同土壤类型土壤.xlsx") # 生成的Excel报告文件路径
output_excel4_path = os.path.join(output_path,f"{soil_prop_name}不同母岩母质土壤属性.xlsx")
soil_prop_features = os.path.join(gdb_path,soil_prop_name)
# 设置工作空间和变量
arcpy.env.workspace = gdb_path
arcpy.env.overwriteOutput = True
print("开始处理数据...")
if soil_prop_name == "GZCHD":
temp_gdtb_trlx_out = r"in_memory/temp_gdtb_trlx_out"
temp_gdtb_trlx = r"in_memory/temp_gdtb_trlx"
temp_files.append(temp_gdtb_trlx)
temp_out_features = r"in_memory/temp_out_type_features"
out_table_mean = r"in_memory/out_table_type_mean"
temp_files.append(temp_out_features)
temp_files.append(out_table_mean)
# 2. 用样点进行空间连接到土壤类型图斑
fields_to_keep = {
soil_prop_features: [soil_prop_name],
trlx_features: ["YL", "TS", "TZ"],
}
field_mappings = arcpy.FieldMappings()
for join_features in fields_to_keep.keys():
for field_name in fields_to_keep[join_features]:
try:
field_map = arcpy.FieldMap()
field_map.addInputField(join_features, field_name)
field_map.mergeRule = "First" # 对所有连接字段使用 "First" 规则
field_mappings.addFieldMap(field_map)
except Exception as e:
print(f"警告: 添加字段 '{field_name}' (来自 '{join_features}') 时出错,将跳过。错误信息: {e}")
# 定义需要过滤地类的属性列表
filtered_props = ['ECA', 'EMG', 'ACU', 'AZN', 'AFE', 'AMN', 'AMO', 'AB', 'AS1', 'TSE']
# 空间连接
arcpy.analysis.SpatialJoin(soil_prop_features, trlx_features, temp_out_features, "JOIN_ONE_TO_ONE", "KEEP_ALL",field_mappings, "INTERSECT")
if soil_prop_name == "GZCHD":
arcpy.analysis.Intersect([trlx_features, dltb_features], temp_gdtb_trlx, 'NO_FID')
arcpy.conversion.ExportFeatures(temp_gdtb_trlx,temp_gdtb_trlx_out,"DLBM LIKE '01%'")
# 3. 以表格显示分区统计 计算均值
arcpy.sa.ZonalStatisticsAsTable(temp_gdtb_trlx_out, "YL_TS", soil_prop_tif, out_table_mean, "DATA", "MEAN")
trlx_area_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(temp_gdtb_trlx_out, ["YL", "TS", "Shape@Area"]))
# 如果当前属性在列表中,则只统计耕地和园地
elif soil_prop_name in filtered_props:
temp_gdtb_trlx_filtered = r"in_memory/temp_gdtb_trlx_filtered"
temp_gdtb_trlx_out_filtered = r"in_memory/temp_gdtb_trlx_out_filtered"
temp_files.append(temp_gdtb_trlx_filtered)
temp_files.append(temp_gdtb_trlx_out_filtered)
# 交集土壤类型与土地利用图斑
arcpy.analysis.Intersect([trlx_features, dltb_features], temp_gdtb_trlx_filtered, 'NO_FID')
# 导出耕地和园地DLBM LIKE '01%' OR DLBM LIKE '02%'
arcpy.conversion.ExportFeatures(temp_gdtb_trlx_filtered, temp_gdtb_trlx_out_filtered, "DLBM LIKE '01%' OR DLBM LIKE '02%'")
# 使用过滤后的图斑进行分区统计(制图均值)
arcpy.sa.ZonalStatisticsAsTable(temp_gdtb_trlx_out_filtered, "YL_TS", soil_prop_tif, out_table_mean, "DATA", "MEAN")
# 获取过滤后的面积
trlx_area_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(temp_gdtb_trlx_out_filtered, ["YL", "TS", "Shape@Area"]))
print(f"过滤制图数据仅统计耕地和园地DLBM LIKE '01%' OR '02%'")
else:
# 3. 以表格显示分区统计 计算均值
arcpy.sa.ZonalStatisticsAsTable(trlx_features, "YL_TS", soil_prop_tif, out_table_mean, "DATA", "MEAN")
# 获取土壤类型图斑面积
trlx_area_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(trlx_features, ["YL", "TS", "Shape@Area"]))
trlx_zhitu_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YL_TS", "MEAN", "COUNT"]))
trlx_sample_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(temp_out_features, ["YL", "TS", "TZ", soil_prop_name]))
stat_sample = {
'min': trlx_sample_df[soil_prop_name].min(),
'max': trlx_sample_df[soil_prop_name].max(),
'mean':trlx_sample_df[soil_prop_name].mean(),
'median': trlx_sample_df[soil_prop_name].median(),
'count': trlx_sample_df[soil_prop_name].count()
}
# 处理表3数据
final_dataframe, df_mymz = process_data_for_table3(soil_prop_name,trlx_sample_df, trlx_zhitu_df, trlx_area_df, target_areas_df)
# print(final_dataframe)
# 生成表3
write_to_excel_table3(final_dataframe, output_excel_path, prop_config, stat_sample)
# 母岩母质表
write_to_excel_table4(df_mymz, output_excel4_path, prop_config,stat_sample)
# return df_with_factors
except Exception as e:
print(f"\n处理过程中发生严重错误: {e}")
import traceback
traceback.print_exc()
finally:
temp_files_processor.clean_up_temp_files(temp_files)
import gc
gc.collect()
# --- 4. 主程序入口 ---
# if __name__ == "__main__":
# main()

File diff suppressed because it is too large Load Diff

View File

View File

@@ -0,0 +1,660 @@
# -*- coding: utf-8 -*-
"""
输入重分类后栅格转面要素类、乡镇边界面要素类、地类图斑要素类;
按一级地类统计土壤属性面积 和 按乡镇统计土壤属性面积;
将统计结果导出为Excel表格
将Excel表格转换为jpg图片
"""
import json
import os
from pathlib import Path
import sys
import traceback
import uuid
import arcpy
import argparse
import numpy as np
import pandas as pd
from openpyxl.styles import Font, Alignment, Border, Side, numbers
from openpyxl.utils import get_column_letter
sys.path.append(str(Path(__file__).parent))
from tools.core.utils.os_utils import temp_files_processor
from tools.config.common_config import guangxi_region, yunnan_region
from utils import common_utils, 平差工具
def parse_arguments():
"""解析命令行参数"""
parser = argparse.ArgumentParser(description="将ArcGIS表格转换为Excel")
parser.add_argument("--reclassed_polygon", required=True, help="重分类栅格的面要素")
parser.add_argument("--settings_path", required=True, help="配置文件路径")
args = parser.parse_args()
if args.settings_path:
with open(args.settings_path, 'r', encoding="utf-8") as settings_file:
settings = json.load(settings_file)
area_stat_settings = settings.get("area_stat_settings", {})
if area_stat_settings:
standards_dict_path = area_stat_settings.get("config_file_path", "")
with open(standards_dict_path, 'r', encoding="utf-8") as standards_file:
standards_dict = json.load(standards_file)
config_key = common_utils.get_config_key(Path(args.reclassed_polygon).stem)
output_settings = standards_dict['export_config'][config_key]
area_stat_settings["output_settings"] = output_settings
area_stat_settings["reclassed_polygon"] = args.reclassed_polygon
area_stat_settings["soil_property"] = config_key
else:
print_status("错误: 未找到有效配置文件")
sys.exit(1)
return area_stat_settings
def print_status(message):
"""
输出状态信息到标准输出,用于 GUI 实时显示
格式: STATUS: <message>
"""
print(f"STATUS:{message}")
sys.stdout.flush() # 确保立即输出
def print_result(success, output_path="", error_message=""):
"""
输出最终结果到标准输出,用于 GUI 判断任务状态和获取结果
格式: RESULT:True|<output_path>|
格式: RESULT:False||<error_message>
"""
if success:
print(f"RESULT:True|{output_path}|")
else:
# 在错误信息中替换换行符,避免干扰解析
cleaned_error_message = error_message.replace('\n', ' ').replace('\r', '')
print(f"RESULT:False||{cleaned_error_message}")
sys.stdout.flush() # 确保立即输出
def log_arcpy_message(message):
"""输出 ArcPy 产生的 geoprocessing 消息"""
# 可以在这里进一步处理或过滤 ArcPy 消息
if message.type == 'Message':
print_status(f"ArcPy消息: {message.message}")
elif message.type == 'Warning':
print_status(f"ArcPy警告: {message.message}")
elif message.type == 'Error':
# 对于错误,也可以记录到标准错误
print_status(f"ArcPy错误: {message.message}")
sys.stderr.write(f"ArcPyError:{message.message}\n")
sys.stderr.flush()
def get_specail_map(original_dict):
grade_map = {}
dict_len = len(original_dict)
order = 1
for grade_key, range_str in original_dict.items():
ranges = [r.strip() for r in range_str.replace('\n', ',').split(',') if r.strip()]
s_order = 0
for r in ranges:
grade_map[str(order + s_order*dict_len)] = r
s_order += 1
order += 1
return grade_map
class SoilQualityReporter:
def __init__(self, output_path, grade_map: dict, landuse_map, output_settings):
"""
初始化土壤质量报告生成器
参数:
output_path: 输出Excel文件路径
"""
self.output_path = output_path
self.grade_map = grade_map
self.landuse_map = landuse_map
self.all_grades = [int(key) for key in self.grade_map.keys()]
self.xiangmu_name = output_settings['项目分级'].split('\n')[0]
self.xiangmu_jibie =self.xiangmu_name + "分级" + output_settings['分级标准'].split('\n')[1]
def prepare_data(self, stats):
"""
准备乡镇统计和地类统计两个表格
参数:
stats: 包含原始统计数据的DataFrame
需要包含列: XZQMC(乡镇名称), YJDLBM(地类编码), GRIDCODE(土壤等级), temp_area(面积)
返回:
df_town: 乡镇统计表
df_landuse: 地类统计表
"""
# 表格1各乡镇耕地土壤有机质分级面积统计
df_town = self._create_town_table(stats)
# 表格2各地类土壤有机质分级面积统计
df_landuse = self._create_landuse_table(stats)
return df_town, df_landuse
# 先创建GRIDCODE到分组的映射
# def _map_grade(self, code):
# code = int(code)
# if code in (1,6):
# return 1
# elif code in (2, 7):
# return 2
# elif code in (3, 8):
# return 3
# elif code in (4, 9):
# return 4
# elif code in (5,10):
# return 5
# return code
def _create_town_table(self, stats:pd.DataFrame):
"""生成乡镇统计表"""
# 复制数据并添加分组列
df_stats = stats.copy()
# 如果存在YJDLBM列确保其值为字符串
if "YJDLBM" not in df_stats.columns:
# 取YNDLBM列的前两位作为YJDLBM
df_stats["YJDLBM"] = df_stats["YNDLBM"].str[:2]
# df_stats["GRID_GROUP"] = df_stats["GRIDCODE"].apply(self._map_grade)
# 使用分组列进行透视
df = df_stats[df_stats["YJDLBM"] == "01"].pivot_table(
index="XZQMC",
columns="GRIDCODE",
values="adjusted_area",
aggfunc="sum",
fill_value=0,
observed=False
)
# 确保所有等级列都存在
for grade in self.all_grades:
if grade not in df.columns:
df[grade] = 0
# 按等级排序并添加总计
df = df[self.all_grades]
df["总计"] = df.sum(axis=1)
df.loc["总计"] = df.sum(axis=0)
# 重命名列
df.columns = [self.grade_map.get(str(col), str(col)) for col in df.columns]
return df
def _create_landuse_table(self, stats):
"""生成地类统计表"""
# 复制数据并添加分组列
df_stats = stats.copy()
# df_stats["GRID_GROUP"] = df_stats["GRIDCODE"].apply(self._map_grade)
if "YJDLBM" not in df_stats.columns:
df = df_stats.pivot_table(
index="YNDLBM",
columns="GRIDCODE",
values="adjusted_area",
aggfunc="sum",
fill_value=0,
observed=False
)
else:
df = df_stats.pivot_table(
index="YJDLBM",
columns="GRIDCODE",
values="adjusted_area",
aggfunc="sum",
fill_value=0,
observed=False
)
# 确保所有等级列都存在
for grade in self.all_grades:
if grade not in df.columns:
df[grade] = 0
# 按等级排序并添加总计
df = df[self.all_grades]
df["总计"] = df.sum(axis=1)
df.loc["总计"] = df.sum(axis=0)
# 重命名索引和列
df = df.rename(index=self.landuse_map)
df.columns = [self.grade_map.get(str(col), str(col)) for col in df.columns]
return df
def generate_report(self, stats):
"""
生成完整报告
参数:
stats: 包含原始统计数据的DataFrame
"""
self.is_yunnan = True if 'YNDLBM' in stats.columns else False
# 准备数据
df_town, df_landuse = self.prepare_data(stats)
# 导出Excel
self._export_to_excel(df_town, df_landuse)
def _export_to_excel(self, df_town, df_landuse):
"""导出数据到Excel"""
with pd.ExcelWriter(self.output_path, engine='openpyxl') as writer:
workbook = writer.book
sheet = workbook.create_sheet("综合统计表")
# 写入乡镇统计表
self._write_town_table(sheet, df_town)
# 写入地类统计表
start_row_landuse = len(df_town) + 5
self._write_landuse_table(sheet, df_landuse, start_row_landuse)
# 应用通用格式
self._apply_common_format(sheet, start_row_landuse)
# 删除默认空工作表
if 'Sheet' in workbook.sheetnames:
workbook.remove(workbook['Sheet'])
def _write_town_table(self, sheet, df):
"""写入乡镇统计表"""
# 动态计算列数
last_col = len(df.columns) + 2 # 最后一列的索引
last_col_letter = get_column_letter(last_col) # 转为字母
second_last_col_letter = get_column_letter(last_col - 1)
# 表头
sheet.merge_cells(f"A1:{last_col_letter}1")
sheet["A1"] = f"各乡镇耕地土壤{self.xiangmu_name}分级面积统计表"
sheet["A1"].font = Font(size=18)
sheet["A1"].alignment = Alignment(horizontal='center')
sheet.row_dimensions[1].height = 34
# 单位行
sheet[f"{last_col_letter}2"] = "单位:亩"
sheet[f"{last_col_letter}2"].font = Font(size=14)
sheet[f"{last_col_letter}2"].alignment = Alignment(horizontal='center')
# 列标题
sheet.merge_cells("A3:B4")
sheet["A3"] = "乡镇"
# 总计
sheet.merge_cells(f"{last_col_letter}3:{last_col_letter}4")
sheet[f"{last_col_letter}3"] = "总计"
sheet.merge_cells(f"C3:{second_last_col_letter}3")
sheet["C3"] = self.xiangmu_jibie
sheet.row_dimensions[3].height = 25
# 写入分级列名
for col_num, col_name in enumerate(df.columns[:-1], start=2):
# print(col_num, col_name)
sheet.cell(row=4, column=col_num+1, value=col_name)
# 写入数据
for r_idx, (index, row) in enumerate(df.iterrows(), start=5):
sheet.merge_cells(f"A{r_idx}:B{r_idx}")
sheet.cell(row=r_idx, column=1, value=index)
for c_idx, value in enumerate(row, start=2):
sheet.cell(row=r_idx, column=c_idx+1, value=value)
def _write_landuse_table(self, sheet, df, start_row):
"""写入地类统计表"""
# 动态计算列数
last_col = len(df.columns) + 2 # 最后一列的索引
last_col_letter = get_column_letter(last_col) # 转为字母
second_last_col_letter = get_column_letter(last_col - 1)
# 表头
sheet.merge_cells(f"A{start_row}:{last_col_letter}{start_row}")
sheet[f"A{start_row}"] = f"各地类土壤{self.xiangmu_name}分级面积统计表"
sheet.row_dimensions[start_row].height = 34
# 单位行
sheet[f"{last_col_letter}{start_row+1}"] = "单位:亩"
# 列标题
if self.is_yunnan:
sheet.merge_cells(f"A{start_row+2}:B{start_row+2}")
sheet[f"A{start_row+2}"] = "土地利用类型"
sheet[f"A{start_row+3}"] = "一级"
sheet[f"B{start_row+3}"] = "二级"
else:
sheet.merge_cells(f"A{start_row+2}:B{start_row+3}")
sheet[f"A{start_row+2}"] = "土地利用\n类型"
sheet.merge_cells(f"{last_col_letter}{start_row+2}:{last_col_letter}{start_row+3}")
sheet[f"{last_col_letter}{start_row+2}"] = "总计"
sheet.merge_cells(f"C{start_row+2}:{second_last_col_letter}{start_row+2}")
sheet[f"C{start_row+2}"] = self.xiangmu_jibie
sheet.row_dimensions[start_row+2].height = 25
# 写入分级列名
for col_num, col_name in enumerate(df.columns[:-1], start=2):
sheet.cell(row=start_row+3, column=col_num+1, value=col_name)
# 写入数据
for r_idx, (index, row) in enumerate(df.iterrows(), start=start_row+4):
sheet.merge_cells(f"A{r_idx}:B{r_idx}")
sheet.cell(row=r_idx, column=1, value=index)
for c_idx, value in enumerate(row, start=2):
sheet.cell(row=r_idx, column=c_idx+1, value=value)
def _apply_common_format(self, sheet, landuse_start_row):
"""应用通用格式"""
# 设置列宽
for col in range(1, sheet.max_column + 1):
col_letter = chr(64 + col)
sheet.column_dimensions[col_letter].width = 14
for row in range(5, sheet.max_row+1):
if row not in [landuse_start_row,landuse_start_row+1, landuse_start_row+2,landuse_start_row+3]:
sheet.row_dimensions[row].height = 23
# 定义边框样式
thin_border = Border(
left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin')
)
# 应用样式到所有单元格
for row in sheet.iter_rows(min_row=3, max_row=sheet.max_row, min_col=1, max_col=sheet.max_column):
for cell in row:
cell.alignment = Alignment(horizontal='center', vertical='center', wrap_text=True)
cell.font = Font(bold=True, size=14)
# 特殊格式
if cell.column == 1 and cell.row > 3 and cell.row not in (landuse_start_row, landuse_start_row+2): # 列A
cell.font = Font(bold=False, size=14)
if cell.column == sheet.max_column and cell.row == landuse_start_row+1:
cell.font = Font(bold=False, size=14)
#地类统计表头
if cell.row == landuse_start_row:
cell.font = Font(bold=False, size=18)
cell.alignment = Alignment(vertical='bottom', horizontal='center')
# 地类统计表列标题
if (cell.row == 3 and cell.column == 2) or (cell.row == landuse_start_row+2 and cell.column == 2):
cell.font = Font(bold=False, size=14)
# 数字格式
if isinstance(cell.value, (int, float)):
cell.number_format = numbers.FORMAT_NUMBER
if round(cell.value,0) == 0.0:
cell.value = "-"
# 边框
if cell.row >1 and cell.row not in (landuse_start_row, landuse_start_row+1):
cell.border = thin_border
def read_arcgis_table(table_path):
"""将ArcGIS表格转换为Pandas DataFrame"""
array = arcpy.da.TableToNumPyArray(table_path, "*")
df = pd.DataFrame(array)
# df.to_csv(r"D:\工作\三普成果编制\出图数据\广西海城区\过程数据\酸化面积统计表\temp.csv")
df.columns = df.columns.str.upper()
df["temp_area"] = df["AREA"] * 0.0015
df["temp_area"] = df["temp_area"].round(4)
# 删除可能存在的OID字段如果不需要
if 'OID@' in df.columns:
df = df.drop('OID@', axis=1)
return df
# 获取每个一级地类面积主要是12类
def get_area_by_group(dltb_class_feature, excel_target_path, xzqmc, is_by_xzq=False):
try:
# 读取目标面积Excel文件
if xzqmc in yunnan_region:
target_df = pd.read_excel(excel_target_path, sheet_name="Sheet2")
landuse_types = {'0101':'水田', '0102':'水浇地', '0103':'旱地', '02':'园地', '03':'林地', '04':'草地', '12':'其他'}
elif xzqmc in guangxi_region:
target_df = pd.read_excel(excel_target_path, sheet_name="Sheet1")
landuse_types = {'01':'耕地', '02':'园地', '03':'林地', '04':'草地', '12':'其他'}
else:
target_df = pd.read_excel(excel_target_path, sheet_name="Sheet1")
landuse_types = {'01':'耕地', '02':'园地', '03':'林地', '04':'草地', '12':'其他'}
# 确保列名匹配
target_df.columns = target_df.columns.str.strip()
if is_by_xzq:
# 地类编码映射字典
land_type_mapping = {
'耕地': '01',
'园地': '02',
'林地': '03',
'草地': '04',
'其他': '12'
}
# 方法1重命名列后转换为字典
df_encoded = target_df.rename(columns=land_type_mapping)
result_dict = df_encoded.set_index('行政单位').to_dict('index')
return result_dict
# 检查要素类是否存在
if not arcpy.Exists(dltb_class_feature):
print(f"警告:输入要素类不存在: {dltb_class_feature}")
else:
if xzqmc in yunnan_region:
dlbm = 'YNDLBM'
elif xzqmc in guangxi_region:
dlbm = 'YJDLBM'
else:
dlbm = 'YJDLBM'
# 转为numpy数组供pandas统计使用
df = pd.DataFrame(arcpy.da.TableToNumPyArray(dltb_class_feature, [dlbm, "TBDLMJ"], skip_nulls=False, null_value=np.nan))
qtdl_df = df[df[dlbm] == '12']
if qtdl_df['TBDLMJ'].isnull().any() or qtdl_df['TBDLMJ'].eq(0).any():
print("警告其他地类TBDLMJ字段 存在空值或无效的记录,将不平差其他地类")
target_areas = {}
else:
area_by_group = df.groupby(dlbm)["TBDLMJ"].sum()
for key in area_by_group.keys():
area_by_group[key] = area_by_group[key] * 0.0015
target_areas = area_by_group.to_dict()
# 获取目标面积
gangnan_target = target_df[target_df['行政单位'] == xzqmc]
if gangnan_target.empty:
print(f"警告:未找到{xzqmc}的目标面积数据,将使用TBDLMJ数据进行平差")
return target_areas
for dlbm, dlmc in landuse_types.items():
if dlmc in gangnan_target.columns:
if gangnan_target[dlmc].values[0]:
target_areas[dlbm] = gangnan_target[dlmc].values[0]
return target_areas
except Exception as e:
print(f"计算面积时出错: {str(e)}")
return None
def main():
params = None
temp_files_to_clean = []
original_workspace = None
try:
# 1. 解析参数
params = parse_arguments()
reclassed_polygon = params["reclassed_polygon"]
soil_property = params["soil_property"]
xzq_polygon = params["xzq_features"]
dltb_polygon = params["dltb_features"]
output_path = params["batch_output_folder"]
input_path = params["input_folder"]
output_settings = params["output_settings"]
xzqmc = params["xzqmc"]
is_by_xzq = params["is_by_xzq"]
original_workspace = arcpy.env.workspace
arcpy.env.workspace = input_path
arcpy.env.overwriteOutput = True
if not arcpy.Exists(reclassed_polygon):
raise FileNotFoundError(f"输入文件不存在: {reclassed_polygon}")
if not arcpy.Exists(xzq_polygon):
raise FileNotFoundError(f"输入文件不存在: {xzq_polygon}")
if not arcpy.Exists(dltb_polygon):
raise FileNotFoundError(f"输入文件不存在: {dltb_polygon}")
if not os.path.exists(output_path):
os.makedirs(output_path)
output_origin_path = os.path.join(output_path, "原始结果")
if not os.path.exists(output_origin_path):
os.makedirs(output_origin_path)
output_xlsx_path = os.path.join(output_origin_path, f"{soil_property}_原始面积统计表.xlsx")
output_adjust_xlsx_path = os.path.join(output_path, f"{soil_property}_面积统计表.xlsx")
output_jpg_path = os.path.join(output_path, f"{soil_property}_area_stats.jpg")
out_feature_class = fr"in_memory\out_feature_class_{uuid.uuid4().hex[:8]}"
out_dbf_table = fr"in_memory\out_dbf_table_{uuid.uuid4().hex[:8]}"
temp_files_to_clean.append([out_feature_class, out_dbf_table])
# 求地类图斑和重分类栅格面的交集
print_status(f"求地类图斑和重分类栅格面的交集...")
in_features = [dltb_polygon, reclassed_polygon]
arcpy.analysis.Intersect(
in_features=in_features,
out_feature_class=out_feature_class,
join_attributes="ALL",
output_type="INPUT"
)
if xzqmc in yunnan_region:
print_status(f"开始执行交集制表...")
arcpy.analysis.TabulateIntersection(
in_zone_features=xzq_polygon, # 乡镇边界
zone_fields="XZQMC",
in_class_features=out_feature_class,
out_table=out_dbf_table,
class_fields="gridcode;YNDLBM",
out_units="SQUARE_METERS"
)
elif xzqmc in guangxi_region:
# 交集制表
print_status(f"开始执行交集制表...")
arcpy.analysis.TabulateIntersection(
in_zone_features=xzq_polygon, # 乡镇边界
zone_fields="XZQMC",
in_class_features=out_feature_class,
out_table=out_dbf_table,
class_fields="gridcode;YJDLBM",
out_units="SQUARE_METERS"
)
else:
print_status(f"未找到{xzqmc}的区域配置")
raise ValueError(f"未找到{xzqmc}的区域配置")
# 读取DBF表格到Pandas DataFrame
clipped_gdf = read_arcgis_table(out_dbf_table)
# 准备参数
try:
if xzqmc in yunnan_region:
stats = (
clipped_gdf.groupby(["XZQMC", "YNDLBM", "GRIDCODE"])
.agg({"temp_area": "sum"})
.reset_index()
)
elif xzqmc in guangxi_region:
stats = (
clipped_gdf.groupby(["XZQMC", "YJDLBM", "GRIDCODE"])
.agg({"temp_area": "sum"})
.reset_index()
)
else:
print_status(f"未找到{xzqmc}的区域配置")
raise ValueError(f"未找到{xzqmc}的区域配置")
except Exception as e:
stats = (
clipped_gdf.groupby(["XZQMC", "YJDLBM", "GRIDCODE"])
.agg({"temp_area": "sum"})
.reset_index()
)
stats["adjusted_area"] = stats["temp_area"]
# stats.to_csv("area_stats.csv", index=True)
# 重命名列(按实际土壤分级字段调整)
if soil_property == "PH" or soil_property == "TRRZ":
grade_map = get_specail_map(output_settings["标准等级"])
elif "1" in output_settings["标准等级"].values(): # 土壤质地
grade_map = {str(i+1): str(val) for i,val in enumerate(output_settings["标准等级"].keys())}
elif "-10-0.3" in output_settings["标准等级"].values(): # 酸化pH
grade_map = {str(i+1): str(val) for i,val in enumerate(output_settings["标准等级"].keys())}
elif "-100.1" in output_settings["标准等级"].values(): # 二普-三普变化pH
grade_map = {str(i + 1): str(val) for i, val in enumerate(output_settings["标准等级"].keys())}
else:
grade_map = {str(i+1): str(val) for i,val in enumerate(output_settings["标准等级"].values())}
if xzqmc in guangxi_region:
landuse_map = {"01": "耕地", "02": "园地", "03": "林地", "04": "草地", "12": "其他"}
else:
landuse_map = {"0101": "水田", "0102": "水浇地", "0103": "旱地", "02": "园地", "03": "林地", "04": "草地", "12": "其他"}
# 平差处理
excel_target_path = Path("tools/config_json/公布的变更调查平差面积.xlsx") # 您的目标面积Excel文件路径
each_dl_target = get_area_by_group(dltb_polygon, excel_target_path, xzqmc, is_by_xzq) # 获取每个地类目标面积
# if soil_property == "GZCHD":
# print(each_dl_target)
# each_dl_target = {"01":each_dl_target["01"]}
if is_by_xzq:
adjusted_stats = 平差工具.adjust_by_district_landuse(stats, each_dl_target)
else:
adjusted_stats = 平差工具.adjust_area_statistics(stats, each_dl_target)
# print(adjusted_stats)
# 2. 生成XLSX报告
reporter = SoilQualityReporter(output_xlsx_path, grade_map, landuse_map, output_settings)
reporter.generate_report(stats)
reporter_adjust = SoilQualityReporter(output_adjust_xlsx_path, grade_map, landuse_map, output_settings)
reporter_adjust.generate_report(adjusted_stats)
print_result(True, output_jpg_path, "")
except Exception as e:
error_msg = f"主函数错误: {str(e)}\n{traceback.format_exc()}"
print_status(error_msg)
print_result(False, error_message=error_msg)
finally:
temp_files_processor.clean_up_temp_files(temp_files_to_clean, workspace=original_workspace)
sys.exit(0)
if __name__ == '__main__':
print_status("开始执行")
main()

View File

@@ -0,0 +1,143 @@
# -*- coding: utf-8 -*-
"""
输入重分类后栅格转面要素类、乡镇边界面要素类、地类图斑要素类;
按一级地类统计土壤属性面积 和 按乡镇统计土壤属性面积;
酸化情况统计表生成,第一按样点数量统计,第二按制图面积统计(分土壤类型、乡镇、土地利用类型等进行统计)
"""
import json
from pathlib import Path
import sys
import traceback
import time
import arcpy
import argparse
from tools.core.utils import 平差工具
from tools.core.utils.os_utils import temp_files_processor
sys.path.append(str(Path(__file__).parent))
from acid_stats import 空间连接, 行政区划酸化统计表, 土地利用类型酸化统计表, 土壤类型图酸化统计表
def parse_arguments():
"""解析命令行参数"""
parser = argparse.ArgumentParser(description="将ArcGIS表格转换为Excel")
parser.add_argument("--settings_path", required=True, help="配置文件路径")
args = parser.parse_args()
if args.settings_path:
with open(args.settings_path, 'r', encoding="utf-8") as settings_file:
settings = json.load(settings_file)
area_stat_settings = settings.get("acid_stat_settings", {})
else:
print_status("错误: 未找到有效配置文件")
sys.exit(1)
return area_stat_settings
def print_status(message):
"""
输出状态信息到标准输出,用于 GUI 实时显示
格式: STATUS: <message>
"""
print(f"STATUS:{message}")
sys.stdout.flush() # 确保立即输出
def print_result(success, output_path="", error_message=""):
"""
输出最终结果到标准输出,用于 GUI 判断任务状态和获取结果
格式: RESULT:True|<output_path>|
格式: RESULT:False||<error_message>
"""
if success:
print(f"RESULT:True|{output_path}|")
else:
# 在错误信息中替换换行符,避免干扰解析
cleaned_error_message = error_message.replace('\n', ' ').replace('\r', '')
print(f"RESULT:False||{cleaned_error_message}")
sys.stdout.flush() # 确保立即输出
def log_arcpy_message(message):
"""输出 ArcPy 产生的 geoprocessing 消息"""
# 可以在这里进一步处理或过滤 ArcPy 消息
if message.type == 'Message':
print_status(f"ArcPy消息: {message.message}")
elif message.type == 'Warning':
print_status(f"ArcPy警告: {message.message}")
elif message.type == 'Error':
# 对于错误,也可以记录到标准错误
print_status(f"ArcPy错误: {message.message}")
sys.stderr.write(f"ArcPyError:{message.message}\n")
sys.stderr.flush()
def main():
params = None
temp_files_to_clean = []
original_workspace = None
try:
# 1. 解析参数
params = parse_arguments()
xzq_polygon = params["xzq_features"]
dltb_polygon = params["dltb_features"]
output_path = params["batch_output_folder"]
workspace_path = params["workspace_path"]
trlx_features = params["soil_type_features"] # 土壤类型图
assign_raster = params["assign_raster"] # 三普或者二普栅格
ph_sample_feature = params["ph_samples"] # PH样点
sh_ph_tif_temp = params["acid_raster"] # 酸化PH栅格
ph_classed_polygon = params["acid_ph_features"] # 酸化PH重分类后要素
xzqmc = params["xzqmc"]
ph_sample_table = "历史样点PH信息_Table"
original_workspace = arcpy.env.workspace
arcpy.env.workspace = workspace_path
arcpy.env.overwriteOutput = True
# sh_ph_tif = f"in_memory/temp_ph_raster"
# temp_files_to_clean.append(sh_ph_tif)
input_ph_raster = arcpy.Raster(sh_ph_tif_temp)
filtered_raster = arcpy.sa.Con(input_ph_raster > 0.3, input_ph_raster)
# filtered_raster.save(sh_ph_tif)
sh_ph_tif = arcpy.Raster(filtered_raster)
temp_files_to_clean.append(sh_ph_tif)
# 1. 进行空间连接及赋值PH样点
if not arcpy.Exists(ph_sample_table):
print_status("样点空间连接...")
空间连接.export_to_points(ph_sample_feature, dltb_polygon, trlx_features, xzq_polygon, assign_raster, workspace_path)
time.sleep(4)
excel_target_path = Path("tools/config_json/公布的变更调查平差面积.xlsx") # 您的目标面积Excel文件路径
target_area_dict = 平差工具.get_area_by_group(dltb_polygon, excel_target_path, xzqmc) # 获取每个地类目标面积
# 2. 制作统计表
print_status("生成行政区划表...")
df_with_factor = 行政区划酸化统计表.main(workspace_path, xzq_polygon, ph_classed_polygon, dltb_polygon, sh_ph_tif, output_path, target_area_dict)
time.sleep(4)
print_status("生成土地利用类型表...")
土地利用类型酸化统计表.main(workspace_path, ph_classed_polygon,dltb_polygon, sh_ph_tif, output_path,target_area_dict)
time.sleep(4)
print_status("生成土壤类型酸化表...")
土壤类型图酸化统计表.main(workspace_path, trlx_features, ph_classed_polygon, sh_ph_tif, output_path, df_with_factor)
time.sleep(4)
print_result(True, output_path, "")
except Exception as e:
error_msg = f"主函数错误: {str(e)}\n{traceback.format_exc()}"
print_status(error_msg)
print_result(False, error_message=error_msg)
finally:
temp_files_processor.clean_up_temp_files(temp_files_to_clean, workspace=original_workspace)
sys.exit(0)
if __name__ == '__main__':
print_status("开始执行")
main()

View File

@@ -0,0 +1,235 @@
# -*- coding: utf-8 -*-
"""
输入重分类后栅格转面要素类、乡镇边界面要素类、地类图斑要素类;
按一级地类统计土壤属性面积 和 按乡镇统计土壤属性面积;
土壤属性统计表生成,第一按样点数量统计,第二按制图面积统计(分土壤类型、乡镇、土地利用类型等进行统计)
"""
import json
import multiprocessing
import os
from pathlib import Path
import sys
import traceback
import time
import arcpy
import argparse
from tools.core.utils import 平差工具, common_utils
sys.path.append(str(Path(__file__).parent))
from soil_prop_stats import B1土壤属性分级分布, B2土地利用类型土壤属性, B3不同土壤类型土壤属性, E1土壤属性历史变化, B3_TRZD不同土壤类型土壤属性, B1_TRZD土壤属性分级分布, B2_TRZD土地利用类型土壤属性
from soil_prop_stats import B3_TRZD12不同土壤类型土壤属性, B1_TRZD12土壤属性分级分布, B2_TRZD12土地利用类型土壤属性
from tools.config.arcgis_field_cal_code import codeblock_dltb_yjdl, codeblock_dltb_ejdl
from tools.core.utils.os_utils import temp_files_processor
def parse_arguments():
"""解析命令行参数"""
parser = argparse.ArgumentParser(description="将ArcGIS表格转换为Excel")
parser.add_argument("--settings_path", required=True, help="配置文件路径")
args = parser.parse_args()
if args.settings_path:
with open(args.settings_path, 'r', encoding="utf-8") as settings_file:
settings = json.load(settings_file)
area_stat_settings = settings.get("soil_prop_stat_settings", {})
else:
print_status("错误: 未找到有效配置文件")
sys.exit(1)
return area_stat_settings
def print_status(message):
"""
输出状态信息到标准输出,用于 GUI 实时显示
格式: STATUS: <message>
"""
print(f"STATUS:{message}")
sys.stdout.flush() # 确保立即输出
def print_result(success, output_path="", error_message=""):
"""
输出最终结果到标准输出,用于 GUI 判断任务状态和获取结果
格式: RESULT:True|<output_path>|
格式: RESULT:False||<error_message>
"""
if success:
print(f"RESULT:True|{output_path}|")
else:
# 在错误信息中替换换行符,避免干扰解析
cleaned_error_message = error_message.replace('\n', ' ').replace('\r', '')
print(f"RESULT:False||{cleaned_error_message}")
sys.stdout.flush() # 确保立即输出
def log_arcpy_message(message):
"""输出 ArcPy 产生的 geoprocessing 消息"""
# 可以在这里进一步处理或过滤 ArcPy 消息
if message.type == 'Message':
print_status(f"ArcPy消息: {message.message}")
elif message.type == 'Warning':
print_status(f"ArcPy警告: {message.message}")
elif message.type == 'Error':
# 对于错误,也可以记录到标准错误
print_status(f"ArcPy错误: {message.message}")
sys.stderr.write(f"ArcPyError:{message.message}\n")
sys.stderr.flush()
def process_soil_property(args):
"""处理单个土壤属性的函数,用于多进程"""
soil_prop_name, config, data_source_path, history_samples_path, reclassed_features_path, history_reclassed_features_path, sanpu_prop_tif_path, history_raster_path, dltb_features, trlx_features, output_path, target_area_df1, target_area_df2, target_area_all, xzqmc = args
try:
prop_config = config['export_config'][soil_prop_name]
reclassed_features = os.path.join(reclassed_features_path, f"{soil_prop_name}_reclassed_polygon.shp")
history_reclassed_features = os.path.join(history_reclassed_features_path, f"{soil_prop_name}_reclassed_polygon.shp")
soil_prop_tif = os.path.join(sanpu_prop_tif_path, f"{soil_prop_name}.tif")
history_raster = os.path.join(history_raster_path, f"{soil_prop_name}.tif")
if not arcpy.Exists(reclassed_features) or not arcpy.Exists(soil_prop_tif):
print(f"缺少{soil_prop_name}的栅格或重分类要素,请检查输入文件路径是否正确!")
return False
print(f"生成{soil_prop_name}的表1...")
if soil_prop_name == "TRZD12":
B1_TRZD12土壤属性分级分布.main(data_source_path, soil_prop_name, reclassed_features, dltb_features, output_path, target_area_all,xzqmc, prop_config)
elif soil_prop_name == "TRZD":
B1_TRZD土壤属性分级分布.main(data_source_path, soil_prop_name, reclassed_features, dltb_features, output_path, target_area_all,xzqmc, prop_config)
else:
B1土壤属性分级分布.main(data_source_path, soil_prop_name, reclassed_features, dltb_features, soil_prop_tif, output_path, target_area_all,xzqmc, prop_config)
time.sleep(2)
# print(f"生成{soil_prop_name}的表2...")
# if soil_prop_name == "TRZD12":
# B2_TRZD12土地利用类型土壤属性.main(data_source_path, soil_prop_name, dltb_features, reclassed_features, output_path, target_area_df2, prop_config)
# elif soil_prop_name == "TRZD":
# B2_TRZD土地利用类型土壤属性.main(data_source_path, soil_prop_name, dltb_features, reclassed_features, output_path, target_area_df2, prop_config)
# else:
# B2土地利用类型土壤属性.main(data_source_path, soil_prop_name, dltb_features, soil_prop_tif, output_path, target_area_df2, prop_config)
# time.sleep(2)
# print(f"生成{soil_prop_name}的表3...")
# if soil_prop_name == "TRZD12":
# B3_TRZD12不同土壤类型土壤属性.main(data_source_path,soil_prop_name,trlx_features,reclassed_features,output_path,target_area_df1,prop_config)
# elif soil_prop_name == "TRZD":
# B3_TRZD不同土壤类型土壤属性.main(data_source_path, soil_prop_name, trlx_features, reclassed_features, output_path, target_area_df1, prop_config)
# else:
# B3不同土壤类型土壤属性.main(data_source_path, soil_prop_name, trlx_features, soil_prop_tif, output_path, target_area_df1, prop_config, dltb_features)
# time.sleep(2)
print(f"生成{soil_prop_name}的历史对比...")
# if arcpy.Exists(history_reclassed_features) and arcpy.Exists(history_raster) and arcpy.Exists(history_samples_path):
# E1土壤属性历史变化.main(xzqmc,data_source_path,history_samples_path,reclassed_features,history_reclassed_features, soil_prop_name, dltb_features, soil_prop_tif,history_raster, output_path, target_area_all, prop_config)
# time.sleep(2)
# else:
# print_status(f"警告:缺少{soil_prop_name}的历史栅格或重分类要素,请检查输入文件路径是否正确!")
return True
except Exception as e:
print(f"处理{soil_prop_name}时出错: {str(e)}")
return False
def main():
params = None
temp_files_to_clean = []
original_workspace = None
try:
# 1. 解析参数
params = parse_arguments()
xzqmc = params["xzqmc"]
config_file = params["config_file"]
output_path = params["output_folder"]
data_source_path = params["data_source_path"]
sanpu_prop_tif_path = params["sanpu_prop_tif_folder"]
reclassed_features_path = params["reclassed_feature_folder"]
# history_reclassed_features_path = params["history_reclassed_feature_folder"]
soil_prop_name_list = params["sample_list"]
# history_samples_path = params["history_samples_folder"]
# history_raster_path = params["history_raster_folder"]
history_samples_path = r"E:\@三普属性图出图\广西武鸣区\@基础数据\测土配方样点数据\测土配方样点.gdb"
history_raster_path = r"E:\@三普属性图出图\广西武鸣区\@基础数据\测土配方栅格\投影后"
history_reclassed_features_path = r"E:\@三普属性图出图\广西武鸣区\过程数据\测土配方重分类\面积统计用栅格面"
dltb_features = os.path.join(data_source_path, "地类图斑")
trlx_features = os.path.join(data_source_path, "土壤类型图")
original_workspace = arcpy.env.workspace
arcpy.env.workspace = data_source_path
arcpy.env.overwriteOutput = True
excel_target = Path("tools/config_json/公布的变更调查平差面积.xlsx") # 您的目标面积Excel文件路径
excel_target_path = str(excel_target.resolve())
target_area_df2 = 平差工具.get_target_areas(excel_target_path,"Sheet2", xzqmc) # 获取每个二级地类目标面积
target_area_df1 = 平差工具.get_target_areas(excel_target_path,"Sheet1", xzqmc) # 获取每个一级地类目标面积
target_area_all = 平差工具.get_target_areas_by_group(excel_target_path)
# 2. 制作统计表
# 计算土地利用类型图斑的地类
if arcpy.Exists(dltb_features):
try:
check_fields = ["YJDL", "EJDL", "YJDLBM", "YJDL_EJDL"]
if not common_utils.check_fields_exist_describe(dltb_features, check_fields):
arcpy.management.CalculateField(dltb_features, "EJDL", "calculate_ejdl(!DLBM!,!DLMC!)", "PYTHON3", codeblock_dltb_ejdl)
arcpy.management.CalculateField(dltb_features, "YJDL", "calculate_yjdl(!DLBM!)", "PYTHON3", codeblock_dltb_yjdl)
arcpy.management.CalculateField(dltb_features, "YJDLBM", "!DLBM![:2]", "PYTHON3")
arcpy.management.CalculateField(dltb_features,"YJDL_EJDL","!YJDL! + '_' + !EJDL!","PYTHON3")
except Exception as e:
print(f'报什么错:{e}')
# 计算土壤类型图斑的字段用于后续交集制表
if arcpy.Exists(trlx_features):
try:
check_fields = ["YL_TS"]
if not common_utils.check_fields_exist_describe(trlx_features, check_fields):
# 计算YL_TS字段的值
arcpy.management.CalculateField(trlx_features, "YL_TS", "!YL! + '_' + !TS!", "PYTHON3")
except Exception as e:
print(f'报什么错:{e}')
# 获取土壤属性配置文件
with open(config_file, 'r', encoding='utf-8') as f:
config = json.load(f)
# 准备多进程参数
process_args = []
for soil_prop_name in soil_prop_name_list:
args = (soil_prop_name, config, data_source_path, history_samples_path, reclassed_features_path, history_reclassed_features_path,
sanpu_prop_tif_path, history_raster_path, dltb_features, trlx_features, output_path,
target_area_df1, target_area_df2, target_area_all,xzqmc)
process_args.append(args)
# 使用多进程处理
cpu_count = multiprocessing.cpu_count()
# num_processes = min(int(cpu_count if cpu_count<2 else cpu_count/2), len(soil_prop_name_list))
num_processes = 3
print(f"使用 {num_processes} 个进程并行处理 {len(soil_prop_name_list)} 个土壤属性...")
with multiprocessing.Pool(processes=num_processes) as pool:
results = pool.map(process_soil_property, process_args)
# 检查所有任务是否成功完成
if all(results):
print_result(True, output_path, "")
else:
failed_count = results.count(False)
error_msg = f"{failed_count} 个土壤属性处理失败"
print_result(False, error_message=error_msg)
print_result(True, output_path, "")
except Exception as e:
error_msg = f"主函数错误: {str(e)}\n{traceback.format_exc()}"
print_status(error_msg)
print_result(False, error_message=error_msg)
finally:
temp_files_processor.clean_up_temp_files(temp_files_to_clean, workspace=original_workspace)
sys.exit(0)
if __name__ == '__main__':
print_status("开始执行")
multiprocessing.freeze_support()
main()

249
tools/core/test_script.py Normal file
View File

@@ -0,0 +1,249 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
测试脚本
用于测试ArcGIS Pro图层属性和标注功能
"""
import os
import sys
import argparse
import traceback
import arcpy
def log(message):
"""日志输出函数"""
print(message)
def test_arcgis_environment():
"""测试ArcGIS Pro环境"""
log("=== ArcGIS Pro环境信息 ===")
log(f"Python版本: {sys.version}")
log(f"Python路径: {sys.executable}")
log(f"当前工作目录: {os.getcwd()}")
log(f"ArcPy版本: {arcpy.GetInstallInfo()['Version']}")
log(f"ArcPy产品: {arcpy.GetInstallInfo()['ProductName']}")
# 当前环境设置
log("\n=== ArcGIS环境设置 ===")
log(f"工作空间: {arcpy.env.workspace}")
log(f"输出坐标系统: {arcpy.env.outputCoordinateSystem}")
log(f"覆盖输出: {arcpy.env.overwriteOutput}")
def test_layer_properties(layer_path):
"""测试图层属性"""
log(f"\n=== 图层属性测试: {layer_path} ===")
if not arcpy.Exists(layer_path):
log(f"错误: 图层不存在 - {layer_path}")
return
try:
# 创建图层对象
log("尝试创建图层对象...")
layer = arcpy.mp.Layer(layer_path)
log(f"成功创建图层: {layer.name}")
# 图层基本属性
log("\n图层基本属性:")
log(f"名称: {layer.name}")
log(f"数据源类型: {type(layer.dataSource).__name__ if hasattr(layer, 'dataSource') else 'N/A'}")
log(f"长名称: {layer.longName if hasattr(layer, 'longName') else 'N/A'}")
# 图层类型
log("\n图层类型判断:")
log(f"是要素图层: {layer.isFeatureLayer if hasattr(layer, 'isFeatureLayer') else 'N/A'}")
log(f"是栅格图层: {layer.isRasterLayer if hasattr(layer, 'isRasterLayer') else 'N/A'}")
log(f"是图形图层: {layer.isGroupLayer if hasattr(layer, 'isGroupLayer') else 'N/A'}")
# 支持的属性
log("\n支持的属性:")
properties = [
'LABELCLASSES', 'SHOWLABELS', 'NAME', 'DATASOURCE', 'DEFINITIONQUERY',
'VISIBLE', 'TRANSPARENCY', 'BRIGHTNESS', 'CONTRAST', 'SYMBOLOGY'
]
for prop in properties:
try:
support = hasattr(layer, prop.lower()) or (hasattr(layer, 'supports') and layer.supports(prop))
log(f"{prop}: {'支持' if support else '不支持'}")
except Exception as e:
log(f"{prop}: 检查失败 - {str(e)}")
# 尝试获取标注类
log("\n标注类测试:")
try:
if hasattr(layer, 'listLabelClasses'):
label_classes = layer.listLabelClasses()
log(f"找到 {len(label_classes)} 个标注类")
# 显示每个标注类的信息
for i, lc in enumerate(label_classes):
log(f"标注类 #{i+1}: {lc.name if hasattr(lc, 'name') else 'N/A'}")
log(f" 表达式: {lc.expression if hasattr(lc, 'expression') else 'N/A'}")
log(f" SQL查询: {lc.SQLQuery if hasattr(lc, 'SQLQuery') else 'N/A'}")
log(f" 可见: {lc.showClassLabels if hasattr(lc, 'showClassLabels') else 'N/A'}")
else:
log("图层不支持listLabelClasses方法")
# 检查标注状态
if hasattr(layer, 'showLabels'):
log(f"标注显示状态: {layer.showLabels}")
else:
log("图层没有showLabels属性")
except Exception as e:
log(f"获取标注类时出错: {str(e)}")
# 使用arcpy.da.Describe获取详细信息
log("\narcpy.da.Describe描述信息:")
try:
desc = arcpy.da.Describe(layer)
for key in desc:
# 跳过复杂对象
if isinstance(desc[key], (dict, list, tuple)):
log(f"{key}: [复杂类型]")
else:
log(f"{key}: {desc[key]}")
except Exception as e:
log(f"获取Describe信息时出错: {str(e)}")
except Exception as e:
log(f"测试图层属性时出错: {str(e)}")
log(traceback.format_exc())
def test_annotation_conversion(map_path, layer_name, output_folder):
"""测试标注转注记功能"""
log(f"\n=== 标注转注记测试: {map_path}, 图层: {layer_name} ===")
if not arcpy.Exists(map_path):
log(f"错误: 地图文档不存在 - {map_path}")
return
# 确保输出文件夹存在
if not os.path.exists(output_folder):
os.makedirs(output_folder)
try:
# 打开地图文档
log(f"打开地图文档...")
aprx = arcpy.mp.ArcGISProject(map_path)
# 获取活动地图
log(f"获取地图...")
maps = aprx.listMaps()
if not maps:
log("错误: 地图文档中没有地图")
return
target_map = maps[0] # 默认使用第一个地图
log(f"使用地图: {target_map.name}")
# 查找指定图层
log(f"查找图层: {layer_name}")
layers = target_map.listLayers(layer_name)
if not layers:
log(f"错误: 找不到图层 {layer_name}")
return
label_layer = layers[0]
log(f"找到图层: {label_layer.name}")
# 测试图层属性
test_layer_properties(label_layer)
# 尝试开启标注
try:
if hasattr(label_layer, 'showLabels'):
log(f"当前标注状态: {label_layer.showLabels}")
label_layer.showLabels = True
log(f"已开启标注,新状态: {label_layer.showLabels}")
except Exception as e:
log(f"设置标注状态时出错: {str(e)}")
# 尝试执行标注转注记
log("\n执行标注转注记...")
try:
anno_name = f"{label_layer.name}_Anno"
output_anno = os.path.join(output_folder, anno_name)
# 检查是否已存在
if arcpy.Exists(output_anno):
log(f"注记已存在: {output_anno},将尝试删除")
arcpy.management.Delete(output_anno)
# 执行转换
log("使用ConvertLabelsToAnnotation...")
arcpy.cartography.ConvertLabelsToAnnotation(
target_map,
[label_layer],
output_folder,
"FEATURE_LINKED",
"STANDARD",
None,
None,
None
)
log("ConvertLabelsToAnnotation执行成功")
except Exception as e:
log(f"执行标注转注记时出错: {str(e)}")
log(traceback.format_exc())
# 尝试替代方案
try:
log("\n尝试使用LabelFeatures替代方案...")
arcpy.cartography.LabelFeatures(
in_features=label_layer,
out_geodatabase=output_folder
)
log("LabelFeatures执行成功")
except Exception as e2:
log(f"LabelFeatures也失败: {str(e2)}")
except Exception as e:
log(f"测试标注转注记时出错: {str(e)}")
log(traceback.format_exc())
finally:
if 'aprx' in locals():
del aprx
def main():
"""主函数"""
parser = argparse.ArgumentParser(description='ArcGIS Pro图层和标注功能测试')
parser.add_argument('--message', default='测试消息', help='测试消息')
parser.add_argument('--count', type=int, default=1, help='重复次数')
parser.add_argument('--map', help='地图文档路径(.aprx)')
parser.add_argument('--layer', help='图层名称')
parser.add_argument('--layer_path', help='图层文件路径(.lyrx)')
parser.add_argument('--output', help='输出文件夹')
args = parser.parse_args()
# 输出系统信息
test_arcgis_environment()
# 输出测试消息
for i in range(args.count):
log(f"\n{i+1}. {args.message}")
# 如果提供了图层文件路径,测试图层属性
if args.layer_path:
test_layer_properties(args.layer_path)
# 如果提供了地图文档、图层名称和输出文件夹,测试标注转注记
if args.map and args.layer and args.output:
test_annotation_conversion(args.map, args.layer, args.output)
# 退出前打印完成消息
log("\n测试脚本执行完成")
return 0
if __name__ == "__main__":
sys.exit(main())

View File

View File

@@ -0,0 +1,26 @@
import arcpy
import pandas as pd
def read_arcgis_table(table_path):
"""
将ArcGIS表格转换为Pandas DataFrame
:param table_path: ArcGIS表格路径
:return: Pandas DataFrame
表格字段全部转换为大写
面积字段AREA转换为亩保留4位小数存储在temp_area字段中
"""
array = arcpy.da.TableToNumPyArray(table_path, "*")
df = pd.DataFrame(array)
# df.to_csv(r"D:\工作\三普成果编制\出图数据\广西海城区\过程数据\酸化面积统计表\temp.csv")
df.columns = df.columns.str.upper()
df["temp_area"] = df["AREA"] * 0.0015
df["temp_area"] = df["temp_area"].round(4)
# 删除可能存在的OID字段如果不需要
if 'OID@' in df.columns:
df = df.drop('OID@', axis=1)
return df

View File

@@ -0,0 +1,291 @@
import arcpy
import numpy as np
def get_data_type(data_path):
"""获取数据类型
:param data_path: 数据路径
:return: 数据类型
"""
if arcpy.Exists(data_path):
try:
desc = arcpy.Describe(data_path)
return desc.dataType
except:
return False
else:
return False
def get_config_key(every_string: str) -> str:
config_dict = {
"AB": "有效硼","ACU": "有效铜","AMN": "有效锰","AMO": "有效钼","AS1": "有效硫","AZN": "有效锌","CEC": "阳离子交换量","ECA": "交换性钙",
"EMG": "交换性镁","TSE": "全硒","TN": "全氮","TP": "全磷","TK": "全钾","AFE": "有效铁","AK": "速效钾","AP": "有效磷", "TRRZ": "土壤容重","LSFD":"砾石丰度",
"OM": "有机质","FL": "粉粒含量","NL": "黏粒含量","SL": "砂粒含量","PH": "土壤 pH","YXTCHD": "有效土层厚度","GZCHD": "耕作层厚度","TRZD": "土壤质地","TRZD12": "土壤质地",
"三普PH": "三普PH","二普PH": "二普PH","测土PH": "测土PH","二普-三普": "二普-三普","测土-三普": "测土-三普","二普-测土": "二普-测土"
}
try:
for key in config_dict.keys():
in_key = every_string.split("_")[0]
if key == in_key:
return key
return ""
except Exception as e:
return ""
def parse_raster_standard(standard_str):
"""解析重分类标准字符串,返回数值范围
例如:
"2.00" -> (2.0, float('inf'))
"1.002.00" -> (1.0, 2.0)
"≤0.20" -> (0, 0.2)
"""
if "," in standard_str:
temp = []
parts = standard_str.split(",\n")
for part in parts:
temp_part = parse_raster_standard(part)
temp.append(temp_part)
return temp
if "" in standard_str:
value = float(standard_str.replace("", ""))
return (value, float('inf'))
elif "" in standard_str:
parts = standard_str.split("")
return (float(parts[0]), float(parts[1]))
elif "" in standard_str:
value = float(standard_str.replace("", ""))
return (0, value)
else:
# 尝试直接解析为数值
try:
value = float(standard_str)
return (value, value)
except ValueError:
return None
def create_remap_table(standards_dict):
"""根据标准配置创建重分类映射表
参数:
standards_config -- 标准配置,格式为:
{"标准1":5-6, "标准2":7-8, ...}
remap_values -- 重分类值数组默认为从1开始的整数序列
返回:
重分类映射表,格式为 [[old_min, old_max, new_value], ...]
"""
# 确保我们有一个有效的标准列表
if not standards_dict or not isinstance(standards_dict, dict):
print("警告: 没有有效的标准数据")
return []
# 设置重分类值
standards_length = len(standards_dict)
remap_values = list(range(1, 2*standards_length + 1))
remap_table = []
for i, (key, value) in enumerate(standards_dict.items()):
range_tuple = parse_raster_standard(value)
if range_tuple:
if type(range_tuple) is list:
m = 0
for range_tuple_item in range_tuple:
j = m * standards_length + i
remap_table.append([range_tuple_item[0], range_tuple_item[1], remap_values[j]])
m = m + 1
else:
remap_table.append([range_tuple[0], range_tuple[1], remap_values[i]])
return remap_table
def check_fields_exist_describe(feature_class, field_names):
"""
使用Describe函数检查要素类中字段是否存在
"""
try:
desc = arcpy.Describe(feature_class)
existing_fields = [field.name for field in desc.fields]
for field_name in field_names:
if field_name not in existing_fields:
return False
return True
except Exception as e:
print(f"检查字段时出错: {e}")
return None
def get_grade_by_standard(value, grade_standards):
"""
通用的等级判断函数
value: 数值
grade_standards: 分级标准字典,如 {"等级一": "2.00", "等级二": "1.002.00"}
"""
if value is None:
return "无数据"
# 按等级顺序检查(从高到低)
sorted_grades = sorted(grade_standards.items(),
key=lambda x: list(grade_standards.keys()).index(x[0]))
for grade_name, grade_standard in sorted_grades:
if is_value_in_grade(value, grade_standard):
return grade_name
return "超出范围"
def is_value_in_grade(value, grade_standard):
"""
判断数值是否在分级标准范围内
"""
# 处理特殊字符
grade_standard = grade_standard.replace('', '>').replace('', '<=').replace('', '~')
# 处理多范围情况如pH值
if ',' in grade_standard:
ranges = grade_standard.split(',')
for range_str in ranges:
if is_value_in_single_range(value, range_str.strip()):
return True
return False
else:
return is_value_in_single_range(value, grade_standard)
def is_value_in_single_range(value, range_str):
"""
判断数值是否在单个范围内
"""
import re
# 提取数值
numbers = re.findall(r'[-+]?\d*\.\d+|\d+', range_str)
numbers = [float(num) for num in numbers]
if '>' in range_str and '~' in range_str:
# 格式:>下限~上限
return numbers[0] < value <= numbers[1]
elif '>' in range_str:
# 格式:>数值
return value > numbers[0]
elif '<=' in range_str:
# 格式:<=数值
return value <= numbers[0]
elif '~' in range_str:
# 格式:下限~上限
return numbers[0] < value <= numbers[1]
else:
# 无法解析,使用字符串匹配
return str(value) == range_str
def vectorized_grade_assignment(values, grade_standards):
"""
向量化的等级分配(性能更好)
"""
# 确保输入值是数值类型,如果是字符串则转换为浮点数
if isinstance(values, np.ndarray) and values.dtype.kind in 'OUS': # 字符串类型
values = values.astype(float)
elif hasattr(values, 'dtype') and values.dtype == object: # 对象类型,可能包含字符串
values = values.astype(float)
conditions = []
choices = []
# 按等级顺序构建条件
# 创建两个列表来分别存储上段和下段范围
upper_ranges = []
lower_ranges = []
# 遍历排序后的等级
for i, (level, ranges) in enumerate(sorted(grade_standards.items(), key=lambda x: list(grade_standards.keys()).index(x[0])), 1):
# 分割范围字符串
range_list = [r.strip() for r in ranges.split(',')]
if len(range_list) >= 1:
upper_ranges.append((i, range_list[0]))
if len(range_list) >= 2:
# 计算下段范围的索引(原始索引 + 等级总数)
lower_index = i + len(grade_standards)
lower_ranges.append((lower_index, range_list[1]))
# 合并结果
sorted_grades = upper_ranges + lower_ranges
# sorted_grades = sorted(grade_standards.items(), key=lambda x: list(grade_standards.keys()).index(x[0]))
for grade_name, grade_standard in sorted_grades:
condition = create_condition(values, grade_standard)
conditions.append(condition)
choices.append(grade_name)
# 使用np.select进行向量化操作
result = np.select(conditions, choices, default="超出范围")
return result
def create_condition(values, grade_standard):
"""
创建numpy条件
"""
# 清理字符串:替换特殊字符并移除换行符和空格
grade_standard = (grade_standard.replace('', '>')
.replace('', '<=')
.replace('', '~')
.replace('\n', '') # 移除换行符
.replace(' ', '')) # 移除空格
if ',' in grade_standard:
# 多范围处理
ranges = grade_standard.split(',')
condition = None
for range_str in ranges:
if range_str: # 确保不是空字符串
range_condition = create_single_condition(values, range_str.strip())
if condition is None:
condition = range_condition
else:
condition = condition | range_condition
return condition
else:
return create_single_condition(values, grade_standard)
def create_single_condition(values, range_str):
"""
创建单个范围的条件
"""
import re
# 调试输出,帮助排查问题
# print(f"处理范围字符串: '{range_str}'")
# 提取数字
numbers = re.findall(r'[-+]?\d*\.\d+|\d+', range_str)
numbers = [float(num) for num in numbers]
if not numbers:
raise ValueError(f"无法从字符串 '{range_str}' 中提取数字")
# 根据范围符号创建条件
if '>' in range_str and '<=' in range_str:
# 处理 >x<=y 的情况(虽然不常见)
return (values > numbers[0]) & (values <= numbers[1])
elif '>' in range_str and '~' in range_str:
return (values > numbers[0]) & (values <= numbers[1])
elif '>' in range_str:
return values > numbers[0]
elif '<=' in range_str:
return values <= numbers[0]
elif '~' in range_str:
return (values > numbers[0]) & (values <= numbers[1])
else:
# 如果是单个数字
try:
return values == float(range_str)
except ValueError:
raise ValueError(f"无法解析的范围字符串: '{range_str}'")

View File

@@ -0,0 +1,59 @@
# utils/excel_utils.py
import re
from openpyxl.styles import Font, Alignment, Border, Side
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.worksheet import Worksheet
class ExcelStyleUtils:
"""Excel样式工具类"""
@staticmethod
def set_style(
ws: Worksheet,
cell_range: str,
font: Font=Font(name='宋体', size=11),
align: Alignment=Alignment(horizontal='center', vertical='center', wrap_text=True),
border: Border=Border(left=Side(style='thin'), right=Side(style='thin'),top=Side(style='thin'), bottom=Side(style='thin'))):
"""设置单元格样式"""
if cell_range:
for row in ws[cell_range]:
for cell in row:
cell.font = font
cell.alignment = align
cell.border = border
@staticmethod
def auto_adjust_column_width(ws: Worksheet):
"""自动调整列宽"""
dims = {}
for row in ws.rows:
for cell in row:
if cell.value:
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
if get_merge_type(merged_range) == 'column':
continue
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
# 设置列宽
for col, value in dims.items():
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
# 判断单元格类型
def get_merge_type(merged_range):
"""
判断合并类型
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None不是合并单元格
"""
if not merged_range:
return None
min_row, max_row = merged_range.min_row, merged_range.max_row
min_col, max_col = merged_range.min_col, merged_range.max_col
if max_row > min_row and max_col > min_col:
return 'both' # 同时跨行和跨列
elif max_row > min_row:
return 'row' # 行合并(垂直合并)
elif max_col > min_col:
return 'column' # 列合并(水平合并)
else:
return None # 实际上不是合并单元格

View File

@@ -0,0 +1,147 @@
from typing import List, Union
import numpy as np
# 解决百分比相加不为100%
def fix_percentages(values: List[float], total: float) -> List[float]:
"""
修正百分比相加不为100%的问题。
Args:
values (list[float]): 百分比列表,元素个数与总和相同
total (float): 总和
Returns:
list[float]: 修正后的百分比列表
Examples:
>>> values = [0.2, 0.3, 0.5]
>>> total = 1
>>> fix_percentages(values, total)
[20.0, 30.0, 50.0]
>>> values = [0.2, 0.3, 0.5]
>>> total = 0.8
>>> fix_percentages(values, total)
[25.0, 37.5, 62.5]
"""
exact = [v / total * 100 for v in values]
floor = [np.floor(p * 100) / 100 for p in exact] # 向下取整到2位小数
remainders = [exact[i] - floor[i] for i in range(len(exact))]
# 需要分配的百分点数以0.01%为单位)
to_distribute = int(round(10000 - sum(floor) * 100))
# 按余数大小分配
indices = sorted(range(len(remainders)), key=lambda i: remainders[i], reverse=True)
fixed = floor.copy()
for i in range(to_distribute):
fixed[indices[i]] += 0.01
return [round(p, 2) for p in fixed]
# === 误差矫正 ===
def correct_rounding_error(target_total:Union[int,float], adjusted_areas:List[float], original_areas:List[float]) -> List[int]:
"""
健壮的数值舍入误差矫正函数:将浮点型面积值四舍五入后,调整至目标总和。
核心逻辑基于原始数值的小数部分优先级逐次增减1来抵消舍入误差确保最终总和匹配目标值
同时避免调整后数值出现负数,防止无限循环。
Args:
target_total (int/float): 目标总和(最终舍入后数值的合计值),函数内部会转为整型
adjusted_areas (list[float]): 经过比例调整后的浮点型面积列表(待舍入的原始数据)
original_areas (list[float]): 调整前的原始浮点型面积列表(用于计算小数部分优先级)
Returns:
list[int]: 矫正后的整型面积列表,总和尽可能接近/等于target_total
若无法完全矫正,返回尽可能接近的结果并打印警告
Raises:
无显式抛出异常,所有异常会被捕获并打印错误信息,返回保底的四舍五入结果
Notes:
1. 误差矫正规则:
- 误差>0当前总和 < 目标总和):优先给小数部分大的数值+1
- 误差<0当前总和 > 目标总和):优先给小数部分小的数值-1
2. 边界限制调整时确保数值≥0避免出现负数面积
3. 防无限循环:最大迭代次数为 len(adjusted_areas) * 10超出则终止并提示剩余误差
Examples:
>>> target = 10
>>> adjusted = [3.2, 2.8, 4.1] # 四舍五入后总和=3+3+4=10无误差
>>> original = [3.2, 2.8, 4.1]
>>> correct_rounding_error(target, adjusted, original)
[3, 3, 4]
>>> target = 10
>>> adjusted = [3.1, 2.1, 4.1] # 四舍五入后总和=3+2+4=9误差+1
>>> original = [3.1, 2.1, 4.1]
>>> correct_rounding_error(target, adjusted, original)
[3, 2, 5] # 优先给小数部分最大的4.1+1
>>> target = 8
>>> adjusted = [3.9, 2.9, 1.9] # 四舍五入后总和=4+3+2=9误差-1
>>> original = [3.9, 2.9, 1.9]
>>> correct_rounding_error(target, adjusted, original)
[3, 3, 2] # 优先给小数部分最小的1.9-1实际小数1.9>2.9>3.9故调整3.9
"""
try:
target_total = int(target_total)
rounded_areas = [int(round(area)) for area in adjusted_areas]
current_total = sum(rounded_areas)
error = target_total - current_total
if error == 0 or len(adjusted_areas) == 0:
return rounded_areas
# 使用循环分配直到误差为0或无法再分配
remaining_error = error
max_iterations = len(adjusted_areas) * 10 # 防止无限循环
for _ in range(max_iterations):
if remaining_error == 0:
break
# 每次迭代重新计算小数部分和排序
decimal_parts = [float(area - int(area)) for area in original_areas]
indices = list(range(len(adjusted_areas)))
if remaining_error > 0:
indices.sort(key=lambda i: decimal_parts[i], reverse=True)
adjustment = 1
else:
indices.sort(key=lambda i: decimal_parts[i])
adjustment = -1
# 尝试分配一次调整
adjusted = False
for idx in indices:
if (adjustment == 1 and rounded_areas[idx] >= 0) or (adjustment == -1 and rounded_areas[idx] > 0):
rounded_areas[idx] += adjustment
remaining_error -= adjustment
adjusted = True
break
if not adjusted: # 无法再调整
break
if remaining_error != 0:
print(f"警告:无法完全矫正误差,剩余: {remaining_error}")
return rounded_areas
except Exception as e:
print(f"误差矫正出错: {e}")
# 返回原始四舍五入结果作为保底
return [int(round(area)) for area in adjusted_areas]
if __name__ == '__main__':
target = 10
adjusted = [3.3, 3.9, 4.2] # 四舍五入后总和=3+3+4=10无误差
original = [3.25, 2.85, 4.15]
print(correct_rounding_error(target, adjusted, original))

View File

@@ -0,0 +1,47 @@
import gc
import sys
import arcpy
# 临时文件清理
def clean_up_temp_files(temp_files, workspace=None):
"""安全清理临时文件和内存工作空间"""
try:
if temp_files:
for temp_file in temp_files:
if arcpy.Exists(temp_file):
try:
arcpy.management.Delete(temp_file)
# print_status(f"已删除临时文件: {temp_file}")
except Exception as delete_err:
sys.stderr.write(f"CleanupError:无法删除临时文件 {temp_file}: {str(delete_err)}\n")
# 清理内存工作空间 (确保在 in_memory 工作空间中操作,而不是删除其他地方的同名项)
try:
# 切换到内存工作空间进行清理
if arcpy.Exists("in_memory"):
arcpy.env.workspace = "in_memory"
# 删除内存工作空间中的所有内容
for item in arcpy.ListDatasets() + arcpy.ListFeatureClasses() + arcpy.ListRasters():
try:
arcpy.management.Delete(item)
# print_status(f"已清理内存项: in_memory/{item}")
except Exception as delete_mem_item_err:
sys.stderr.write(f"CleanupError:无法清理内存项 in_memory/{item}: {str(delete_mem_item_err)}\n")
except Exception as delete_in_memory_err:
sys.stderr.write(f"CleanupError:清理 in_memory 工作空间时发生错误: {str(delete_in_memory_err)}\n")
# 恢复原始工作空间
if workspace and arcpy.Exists(workspace):
try:
arcpy.env.workspace = workspace
arcpy.management.ClearWorkspaceCache()
except Exception as restore_ws_err:
sys.stderr.write(f"CleanupError:无法恢复原始工作空间 {workspace}: {str(restore_ws_err)}\n")
except Exception as cleanup_err:
# 外层异常捕获
sys.stderr.write(f"CleanupError:清理临时文件过程中发生未预料的错误: {str(cleanup_err)}\n")
# 强制垃圾回收
gc.collect()

View File

@@ -0,0 +1,201 @@
# 获取每个一级地类面积主要是12类
import arcpy
import numpy as np
import pandas as pd
from .math_utils import correct_rounding_error
# 获取目标面积
def get_area_by_group(dltb_class_feature, excel_target_path, xzqmc, is_by_xzq=False):
try:
# 读取目标面积Excel文件
target_df = pd.read_excel(excel_target_path)
# 确保列名匹配
target_df.columns = target_df.columns.str.strip()
if is_by_xzq:
# 地类编码映射字典
land_type_mapping = {
'耕地': '01',
'园地': '02',
'林地': '03',
'草地': '04',
'其他地类': '12'
}
# 方法1重命名列后转换为字典
df_encoded = target_df.rename(columns=land_type_mapping)
result_dict = df_encoded.set_index('行政单位').to_dict('index')
return result_dict
# 检查要素类是否存在
if not arcpy.Exists(dltb_class_feature):
print(f"警告:输入要素类不存在: {dltb_class_feature}")
else:
# 转为numpy数组供pandas统计使用
df = pd.DataFrame(arcpy.da.TableToNumPyArray(dltb_class_feature, ["YJDLBM", "TBDLMJ"], skip_nulls=False, null_value=np.nan))
qtdl_df = df[df['YJDLBM'] == '12']
if qtdl_df['TBDLMJ'].isnull().any() or qtdl_df['TBDLMJ'].eq(0).any():
print("警告其他地类TBDLMJ字段 存在空值或无效的记录,将不平差其他地类")
target_areas = {}
else:
area_by_group = df.groupby("YJDLBM")["TBDLMJ"].sum()
for key in area_by_group.keys():
area_by_group[key] = area_by_group[key] * 0.0015
target_areas = area_by_group.to_dict()
# 获取铁山港区的目标面积
gangnan_target = target_df[target_df['行政单位'] == xzqmc]
if gangnan_target.empty:
print(f"警告:未找到{xzqmc}的目标面积数据,将使用TBDLMJ数据进行平差")
return target_areas
# 提取各土地利用类型的目标面积
landuse_types = {'01':'耕地', '02':'园地', '03':'林地', '04':'草地', '12':'其他地类'}
for dlbm, dlmc in landuse_types.items():
if dlmc in gangnan_target.columns:
if gangnan_target[dlmc].values[0] and not np.isnan(gangnan_target[dlmc].values[0]):
target_areas[dlbm] = gangnan_target[dlmc].values[0]
return target_areas
except Exception as e:
print(f"计算面积时出错: {str(e)}")
return {}
# 按地类平差(全区统一平差)
def adjust_area_statistics(stats_df, target_areas):
"""
根据Excel中的目标面积对统计数据进行平差处理
Parameters:
stats_df: 原始统计数据DataFrame
excel_target_path: 包含目标面积的Excel文件路径
Returns:
adjusted_df: 平差后的DataFrame
"""
try:
if target_areas is None:
print("警告:目标面积数据为空,不进行平差")
return stats_df
# 准备平差数据
adjusted_df = stats_df.copy()
if "YJDLBM" not in adjusted_df.columns:
dlbm = "YNDLBM"
else:
dlbm = "YJDLBM"
adjusted_df['adjusted_area'] = adjusted_df['temp_area']
adjusted_df['adjustment_factor'] = 1.0
# 计算每个地类的原始总面积
original_totals = stats_df.groupby(dlbm)['temp_area'].sum().to_dict()
# 对每个地类进行平差
for yjdl, target_area in target_areas.items():
if (yjdl in original_totals and original_totals[yjdl] > 0) or target_area > 0:
adjustment_factor = target_area / original_totals[yjdl]
# 应用平差系数
mask = adjusted_df[dlbm] == yjdl
adjusted_df.loc[mask, 'adjusted_area'] = adjusted_df.loc[mask, 'temp_area'] * adjustment_factor
adjusted_df.loc[mask, 'adjustment_factor'] = adjustment_factor
# 应用误差矫正,确保总和等于目标值
adjusted_areas = adjusted_df.loc[mask, 'adjusted_area'].tolist()
original_areas = stats_df.loc[mask, 'temp_area'].tolist()
corrected_areas = correct_rounding_error(target_area, adjusted_areas, original_areas)
adjusted_df.loc[mask, 'adjusted_area'] = corrected_areas
print(f"地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}, 目标面积 = {target_area}, 矫正后总面积 = {sum(corrected_areas)}")
return adjusted_df
except Exception as e:
print(f"平差处理失败: {e}")
return stats_df
# 按行政区+地类进行平差
def adjust_by_district_landuse(stats_df:pd.DataFrame, target_areas_dict:dict):
"""
按行政区+地类进行平差
Parameters:
stats_df: 原始统计数据DataFrame
target_areas_dict: 目标面积字典,格式:{'行政区': {'地类': 目标面积}}
Returns:
adjusted_df: 平差后的DataFrame
"""
# 复制原始数据
adjusted_df = stats_df.copy()
adjusted_df['adjusted_area'] = adjusted_df['temp_area']
adjusted_df['adjustment_factor'] = 1.0
# 获取所有存在的行政区和地类
existing_districts = adjusted_df['XZQMC'].unique()
# 检查目标字典中的行政区是否存在
missing_districts = []
tt = [td for td in target_areas_dict.keys()]
for ed in existing_districts:
if ed not in tt:
missing_districts.append(ed)
# 如果有行政区不存在,返回原始数据并提示
if missing_districts:
print(f"警告:平差数据中不存在行政区: {missing_districts},未进行平差")
return stats_df
# 计算每个行政区每个地类的原始总面积
original_totals = stats_df.groupby(['XZQMC', 'YJDLBM'])['temp_area'].sum()
# 对每个行政区的每个地类进行平差
for xzqmc, landuse_targets in target_areas_dict.items():
for yjdl, target_area in landuse_targets.items():
# 检查该行政区是否有此地类数据
if (xzqmc, yjdl) in original_totals.index and original_totals.at[(xzqmc, yjdl)] > 0:
adjustment_factor = target_area / original_totals[(xzqmc, yjdl)]
# 应用平差系数
mask = (adjusted_df['XZQMC'] == xzqmc) & (adjusted_df['YJDLBM'] == yjdl)
adjusted_df.loc[mask, 'adjusted_area'] = adjusted_df.loc[mask, 'temp_area'] * adjustment_factor
adjusted_df.loc[mask, 'adjustment_factor'] = adjustment_factor
# 应用误差矫正,确保总和等于目标值
adjusted_areas = adjusted_df.loc[mask, 'adjusted_area'].tolist()
original_areas = stats_df.loc[mask, 'temp_area'].tolist()
corrected_areas = correct_rounding_error(target_area, adjusted_areas, original_areas)
adjusted_df.loc[mask, 'adjusted_area'] = corrected_areas
print(f"{xzqmc} - 地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}, 目标面积 = {target_area}, 矫正后总面积 = {sum(corrected_areas)}")
return adjusted_df
def get_target_areas(excel_path:str, sheet_name:str, xzqmc:str) -> pd.DataFrame:
df_excel = pd.read_excel(excel_path, sheet_name)
target_df = df_excel[df_excel['行政单位'] == xzqmc]
df_area_for_merge = target_df.set_index('行政单位').iloc[0].reset_index(name='面积').rename(columns={'index': 'EJDL'})
return df_area_for_merge
def get_target_areas_by_group(excel_target_path):
# 读取目标面积Excel文件
target_df = pd.read_excel(excel_target_path,"Sheet1")
# 确保列名匹配
target_df.columns = target_df.columns.str.strip()
result_dict = target_df.set_index('行政单位').to_dict('index')
return result_dict