初始化
This commit is contained in:
392
tools/core/soil_prop_stats/B1_TRZD12土壤属性分级分布.py
Normal file
392
tools/core/soil_prop_stats/B1_TRZD12土壤属性分级分布.py
Normal file
@@ -0,0 +1,392 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import re
|
||||
import arcpy
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, Border, Side, Alignment
|
||||
from openpyxl.utils import get_column_letter
|
||||
|
||||
from tools.core.utils import arcgis_utils, common_utils
|
||||
from tools.core.utils.os_utils import temp_files_processor
|
||||
|
||||
|
||||
trzd5_order = ["砂质", "砂壤质", "壤质", "黏壤质", "黏质"]
|
||||
trzd12_order = ["砂土及壤质砂土", "砂质壤土", "壤土", "粉砂质壤土", "砂质黏壤土", "黏壤土", "粉砂质黏壤土", "砂质黏土", "壤质黏土", "粉砂质黏土", "黏土", "重黏土"]
|
||||
|
||||
# --- 2. 辅助函数 ---
|
||||
# 判断单元格类型
|
||||
def get_merge_type(merged_range):
|
||||
"""
|
||||
判断合并类型
|
||||
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None(不是合并单元格)
|
||||
"""
|
||||
if not merged_range:
|
||||
return None
|
||||
|
||||
min_row, max_row = merged_range.min_row, merged_range.max_row
|
||||
min_col, max_col = merged_range.min_col, merged_range.max_col
|
||||
|
||||
if max_row > min_row and max_col > min_col:
|
||||
return 'both' # 同时跨行和跨列
|
||||
elif max_row > min_row:
|
||||
return 'row' # 行合并(垂直合并)
|
||||
elif max_col > min_col:
|
||||
return 'column' # 列合并(水平合并)
|
||||
else:
|
||||
return None # 实际上不是合并单元格
|
||||
|
||||
# 计算属性等级
|
||||
def get_prop_level(prop_level):
|
||||
"""根据输入值判断 返回等级"""
|
||||
if pd.isna(prop_level) or str(prop_level) == "0":
|
||||
return "-"
|
||||
# 请根据您的实际分级标准调整这里的阈值
|
||||
if str(prop_level) == "8" or prop_level == '砂土及壤质砂土':
|
||||
return "砂质"
|
||||
elif str(prop_level) == "11" or prop_level == '砂质壤土':
|
||||
return "砂壤质"
|
||||
elif str(prop_level) in ["6","3"] or prop_level in ['粉砂质壤土', '壤土']:
|
||||
return "壤质"
|
||||
elif str(prop_level) in ["1","4","9"] or prop_level in ['粉砂质年壤土', '黏壤土', '砂质黏壤土']:
|
||||
return "黏壤质"
|
||||
elif str(prop_level) in ["2","5","7","10","12"] or prop_level in ['粉砂质黏土', '黏土', '壤质黏土', '砂质黏土', '重黏土']:
|
||||
return "黏质"
|
||||
else:
|
||||
return "-"
|
||||
|
||||
# 等级计算
|
||||
def process_soil_dataframe(df:pd.DataFrame, level_config, target_prop):
|
||||
"""
|
||||
处理土壤数据DataFrame,添加分级列
|
||||
"""
|
||||
result_df = df.copy()
|
||||
|
||||
if level_config and target_prop in df.columns:
|
||||
grade_standards = level_config["标准等级"]
|
||||
grade_column = "GRIDCODE"
|
||||
|
||||
# 使用向量化方法(性能更好)
|
||||
result_df[grade_column] = common_utils.vectorized_grade_assignment(
|
||||
df[target_prop].values, grade_standards
|
||||
)
|
||||
|
||||
# 统计分级结果
|
||||
result_df['YJDL'] = result_df['TDLYLX'].str[:2]
|
||||
|
||||
return result_df
|
||||
|
||||
# --- 3. 数据处理与分析 均值---
|
||||
def process_data_for_table1(gdb_path, soil_prop_feature_name, df_origin_area, target_areas_dict,xzqmc,is_by_xzq, prop_config=None):
|
||||
"""
|
||||
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
|
||||
"""
|
||||
print("开始处理数据...")
|
||||
|
||||
def clean_df(df, columns):
|
||||
for col in columns:
|
||||
df[col] = df[col].astype(str).str.strip()
|
||||
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
|
||||
df.dropna(subset=columns, inplace=True)
|
||||
return df
|
||||
|
||||
# ==a. 处理样点数据,计算“样点均值” ---
|
||||
print("--> 步骤1: 计算样点均值...")
|
||||
field_name = soil_prop_feature_name
|
||||
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
|
||||
sample_fields = ['TDLYLX', field_name]
|
||||
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
|
||||
df_samples = clean_df(df_samples, [field_name])
|
||||
|
||||
processed_df = process_soil_dataframe(df_samples, prop_config, field_name) # 返回具有属性分级的列
|
||||
processed_df['GRIDCODE'] = processed_df['GRIDCODE'].astype('int')
|
||||
processed_df['属性分级'] = processed_df['GRIDCODE'].apply(get_prop_level)
|
||||
|
||||
# 计算全部样点均值、中位值、范围
|
||||
processed_df[field_name] = processed_df[field_name].astype('float')
|
||||
|
||||
# ===处理样点数据,计算 各分级样点数
|
||||
df_sample_means = processed_df.groupby(['属性分级','GRIDCODE']).size().reset_index(name='样点数')
|
||||
df_sample_means['样点数占比'] = df_sample_means['样点数'] / df_sample_means['样点数'].sum() * 100
|
||||
print("样点数计算完成。")
|
||||
|
||||
|
||||
# ==处理制图数据,获各等级制图面积
|
||||
# print(df_origin_area)
|
||||
df_origin_area['YJDL'] = df_origin_area['YJDL_EJDL'].str.split('_').str[0]
|
||||
df_map_data = df_origin_area.groupby(["XZQMC","YJDL", "GRIDCODE"]).agg({"temp_area": "sum"}).reset_index()
|
||||
# print(df_map_data)
|
||||
|
||||
try:
|
||||
if is_by_xzq:
|
||||
df_map_data['adjusted_area'] = df_map_data['temp_area']
|
||||
df_map_data['adjustment_factor'] = 1.0
|
||||
|
||||
# 获取所有存在的行政区和地类
|
||||
existing_districts = df_map_data['XZQMC'].unique()
|
||||
|
||||
# 检查目标字典中的行政区是否存在
|
||||
missing_districts = []
|
||||
tt = [td for td in target_areas_dict.keys()]
|
||||
for ed in existing_districts:
|
||||
if ed not in tt:
|
||||
missing_districts.append(ed)
|
||||
|
||||
# 如果有行政区不存在,返回原始数据并提示
|
||||
if missing_districts:
|
||||
print(f"警告:平差数据中不存在行政区: {missing_districts},未进行平差")
|
||||
|
||||
# 计算每个行政区每个地类的原始总面积
|
||||
original_totals = df_map_data.groupby(['XZQMC', 'YJDL'])['temp_area'].sum()
|
||||
|
||||
# 对每个行政区的每个地类进行平差
|
||||
for xzqmc, landuse_targets in target_areas_dict.items():
|
||||
for yjdl, target_area in landuse_targets.items():
|
||||
# 检查该行政区是否有此地类数据
|
||||
if (xzqmc, yjdl) in original_totals.index and original_totals[(xzqmc, yjdl)] > 0:
|
||||
adjustment_factor = target_area / original_totals[(xzqmc, yjdl)]
|
||||
|
||||
# 应用平差系数
|
||||
mask = (df_map_data['XZQMC'] == xzqmc) & (df_map_data['YJDL'] == yjdl)
|
||||
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
|
||||
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
|
||||
|
||||
# print(f"{xzqmc} - 地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
|
||||
else:
|
||||
# 用df_target_area按YJDL进行平差计算
|
||||
original_totals = df_map_data.groupby('YJDL')['temp_area'].sum().to_dict()
|
||||
# 对每个地类进行平差
|
||||
target_area_dict = target_areas_dict.get(xzqmc,"")
|
||||
# print(target_areas_dict)
|
||||
for yjdl, target_area in target_area_dict.items():
|
||||
if (yjdl in original_totals and original_totals[yjdl] > 0) or target_area > 0:
|
||||
adjustment_factor = target_area / original_totals[yjdl]
|
||||
|
||||
# 应用平差系数
|
||||
mask = df_map_data['YJDL'] == yjdl
|
||||
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
|
||||
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
|
||||
|
||||
# print(f"地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
|
||||
except Exception as e:
|
||||
print(f"平差处理失败: {e}")
|
||||
|
||||
# print(df_map_data)
|
||||
df_map_data['面积_亩'] = df_map_data['temp_area']
|
||||
|
||||
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level)
|
||||
df_map_areas = df_map_data.groupby(['属性分级','GRIDCODE'])['面积_亩'].sum().reset_index(name='制图面积')
|
||||
# 面积平差
|
||||
df_map_areas['制图面积_平差后'] = df_map_areas['制图面积']
|
||||
# ===计算面积占比
|
||||
df_map_areas['面积占比'] = df_map_areas['制图面积_平差后'] / df_map_areas['制图面积_平差后'].sum() * 100
|
||||
|
||||
# --- c. 合并数据 ---
|
||||
print("--> 步骤3: 合并数据...")
|
||||
df_skeleton = pd.concat([
|
||||
df_sample_means[['属性分级','GRIDCODE']],
|
||||
df_map_areas[['属性分级','GRIDCODE']]
|
||||
]).drop_duplicates().reset_index(drop=True)
|
||||
|
||||
df_final = pd.merge(df_skeleton, df_sample_means, on=['属性分级','GRIDCODE'], how='left')
|
||||
df_final = pd.merge(df_final, df_map_areas, on=['属性分级','GRIDCODE'], how='left')
|
||||
# print(df_final)
|
||||
# (可选) 按“一级地类”和“二级地类”排序
|
||||
df_final["属性分级"] = pd.Categorical(df_final['属性分级'], categories=trzd5_order, ordered=True)
|
||||
# df_final["EJDL"] = pd.Categorical(df_final['EJDL'], categories=in_ejdl_order, ordered=True)
|
||||
|
||||
df_final.sort_values(['属性分级','GRIDCODE'], inplace=True)
|
||||
|
||||
print("数据处理流程完成!")
|
||||
# print(df_final)
|
||||
return df_final
|
||||
|
||||
# --- 3. Excel 制表 总表---
|
||||
def write_to_excel_table1(df:pd.DataFrame, output_path, prop_config):
|
||||
"""
|
||||
【最终修正版】: 将处理好的数据写入格式化的 Excel 文件。
|
||||
"""
|
||||
if df.empty:
|
||||
print("警告: 没有数据可以写入 Excel,将创建一个空的报告。")
|
||||
wb = Workbook()
|
||||
ws = wb.create_sheet("Mysheet", 0)
|
||||
ws['A1'] = "没有有效的统计数据。"
|
||||
wb.save(output_path)
|
||||
return
|
||||
|
||||
print(f"开始生成 Excel 报告到 '{output_path}'...")
|
||||
wb = Workbook()
|
||||
ws = wb.create_sheet("Mysheet", 0)
|
||||
ws.title = "行政区酸化程度等级分布及占比"
|
||||
|
||||
# --- a. 定义样式 (不变) ---
|
||||
header_font = Font(name='宋体', size=11)
|
||||
cell_font = Font(name='宋体', size=11)
|
||||
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
|
||||
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
|
||||
top=Side(style='thin'), bottom=Side(style='thin'))
|
||||
|
||||
def apply_style(cell_range, font, alignment=None, border=None):
|
||||
for row in ws[cell_range]:
|
||||
for cell in row:
|
||||
cell.font = font
|
||||
if alignment: cell.alignment = alignment
|
||||
if border: cell.border = border
|
||||
|
||||
# --- b. 绘制表头 (不变) ---
|
||||
ws.merge_cells('A1:B1'); ws['A1'] = '土壤三普分类'
|
||||
ws.merge_cells('C1:D1'); ws['C1'] = '样点统计'
|
||||
ws.merge_cells('E1:F1'); ws['E1'] = '制图统计'
|
||||
|
||||
ws['A2'] = '类别'; ws['B2'] = '名称'
|
||||
ws['C2'] = '数量/个'; ws['D2'] = '占比%'
|
||||
ws['E2'] = '面积/亩'; ws['F2'] = '占比%'
|
||||
|
||||
level_dict = prop_config['标准等级']
|
||||
# 创建两个列表来分别存储上段和下段范围
|
||||
upper_ranges = {value: key for key, value in level_dict.items()}
|
||||
|
||||
# --- c. 填充数据 ---
|
||||
current_row = 3
|
||||
|
||||
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
|
||||
|
||||
for yl, group_yl_df in df_to_write.groupby('属性分级', sort=False, observed=False):
|
||||
|
||||
yl_start_row = current_row
|
||||
|
||||
# 1. 遍历该一级地类下的所有“二级地类”并写入数据
|
||||
for _, row_data in group_yl_df.iterrows():
|
||||
ws.cell(row=current_row, column=2).value = upper_ranges.get(str(row_data['GRIDCODE']), '-')
|
||||
|
||||
# --- 填充单元格的逻辑开始 ---
|
||||
col_start = 3 # 从第 C 列开始填充
|
||||
|
||||
# 检查是否找到了该土属的数据
|
||||
if not row_data.empty:
|
||||
|
||||
# 1. 构建要从 data_series 中查找的列名
|
||||
sample_col = f'样点数'
|
||||
sample_pct_col = f'样点数占比'
|
||||
area_col = f'制图面积_平差后'
|
||||
area_pct_col = f'面积占比'
|
||||
|
||||
# 2. 从 data_series 中安全地获取值
|
||||
sample_val = row_data.get(sample_col, 0)
|
||||
sample_pct_val = row_data.get(sample_pct_col, 0)
|
||||
area_val = row_data.get(area_col, 0)
|
||||
area_pct_val = row_data.get(area_pct_col, 0)
|
||||
|
||||
# 3. 将获取到的值填入单元格
|
||||
ws.cell(row=current_row, column=col_start).value = f"{sample_val:.0f}" if sample_val > 0 else "-"
|
||||
# 占比/%
|
||||
ws.cell(row=current_row, column=col_start + 1).value = f"{sample_pct_val:.1f}" if sample_val > 0 else "-"
|
||||
# 制图面积/亩
|
||||
ws.cell(row=current_row, column=col_start + 2).value = f"{area_val:.0f}" if area_val > 0 else "-"
|
||||
# 占比/%
|
||||
ws.cell(row=current_row, column=col_start + 3).value = f"{area_pct_val:.1f}" if area_val > 0 else "-"
|
||||
|
||||
# 移动到下一个酸化等级的起始列
|
||||
col_start += 2
|
||||
else:
|
||||
for _ in range(4):
|
||||
ws.cell(row=current_row, column=col_start).value = "-"
|
||||
col_start += 1
|
||||
|
||||
current_row += 1
|
||||
|
||||
# 合并“一级地类”单元格
|
||||
if yl_start_row <= current_row:
|
||||
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row-1, end_column=1)
|
||||
ws.cell(row=yl_start_row, column=1).value = yl
|
||||
|
||||
# 2. 填充总计行
|
||||
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=2)
|
||||
ws.cell(row=current_row, column=1).value = '全区'
|
||||
ws.cell(row=current_row, column=3).value = df['样点数'].sum()
|
||||
ws.cell(row=current_row, column=4).value = '100'
|
||||
ws.cell(row=current_row, column=5).value = f"{df['制图面积_平差后'].sum():.0f}"
|
||||
ws.cell(row=current_row, column=6).value = '100'
|
||||
|
||||
# --- d. 应用样式和调整列宽 (最终健壮版) ---
|
||||
if current_row > 1: # 确保有数据才应用样式
|
||||
apply_style(f'A1:F{current_row}', cell_font, center_align, thin_border)
|
||||
apply_style(f'A1:F2', header_font)
|
||||
|
||||
print("正在自动调整列宽...")
|
||||
|
||||
dims = {}
|
||||
for row in ws.rows:
|
||||
for cell in row:
|
||||
if cell.value:
|
||||
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
|
||||
if get_merge_type(merged_range) == 'column':
|
||||
continue
|
||||
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
|
||||
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
|
||||
# 设置列宽
|
||||
for col, value in dims.items():
|
||||
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
|
||||
|
||||
# --- e. 保存文件 ---
|
||||
wb.save(output_path)
|
||||
print("Excel 报告生成成功!")
|
||||
|
||||
|
||||
def main(gdb_path, soil_prop_name, reclassed_features_path, dltb_features, output_path, target_area_dict,xzqmc, prop_config):
|
||||
try:
|
||||
# --- 1. 用户配置 ---
|
||||
# 输出配置
|
||||
temp_files = []
|
||||
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土壤分级分布.xlsx") # 生成的Excel报告文件路径
|
||||
|
||||
# 设置工作空间和变量
|
||||
arcpy.env.workspace = gdb_path
|
||||
arcpy.env.overwriteOutput = True
|
||||
|
||||
print("开始处理数据...")
|
||||
is_by_xzq = False if xzqmc not in ["北海市","来宾市","楚雄自治州"] else True
|
||||
|
||||
# out_table_mean = r"in_memory/out_table_mean"
|
||||
temp_out_feature_class = r"in_memory/temp_out_feature_class"
|
||||
temp_out_tables_area = r"in_memory/temp_out_tables_area"
|
||||
# temp_files.append(out_table_mean)
|
||||
temp_files.append(temp_out_tables_area)
|
||||
|
||||
# 求地类图斑和重分类栅格面的交集
|
||||
arcpy.analysis.Intersect(
|
||||
in_features=[dltb_features,reclassed_features_path],
|
||||
out_feature_class=temp_out_feature_class,
|
||||
join_attributes="ALL",
|
||||
output_type="INPUT"
|
||||
)
|
||||
# 行政区划和相交结果进行交集制表
|
||||
arcpy.analysis.TabulateIntersection(
|
||||
in_zone_features="行政区划", # 乡镇边界
|
||||
zone_fields="XZQMC",
|
||||
in_class_features=temp_out_feature_class,
|
||||
out_table=temp_out_tables_area,
|
||||
class_fields="gridcode;YJDL_EJDL",
|
||||
out_units="SQUARE_METERS"
|
||||
)
|
||||
clipped_table_df = arcgis_utils.read_arcgis_table(temp_out_tables_area)
|
||||
|
||||
# 生成表1 土壤属性分级分布 的统计Excel报告
|
||||
final_dataframe = process_data_for_table1(gdb_path, soil_prop_name, clipped_table_df, target_area_dict,xzqmc,is_by_xzq, prop_config)
|
||||
|
||||
write_to_excel_table1(final_dataframe, output_excel_path, prop_config)
|
||||
|
||||
# return df_with_factors
|
||||
except Exception as e:
|
||||
print(f"\n处理过程中发生严重错误: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
temp_files_processor.clean_up_temp_files(temp_files)
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
# --- 4. 主程序入口 ---
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
360
tools/core/soil_prop_stats/B1_TRZD土壤属性分级分布.py
Normal file
360
tools/core/soil_prop_stats/B1_TRZD土壤属性分级分布.py
Normal file
@@ -0,0 +1,360 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import re
|
||||
|
||||
import arcpy
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, Border, Side, Alignment
|
||||
from openpyxl.utils import get_column_letter
|
||||
|
||||
from tools.core.utils import arcgis_utils, common_utils
|
||||
from tools.core.utils.os_utils import temp_files_processor
|
||||
|
||||
|
||||
# --- 2. 辅助函数 ---
|
||||
# 判断单元格类型
|
||||
def get_merge_type(merged_range):
|
||||
"""
|
||||
判断合并类型
|
||||
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None(不是合并单元格)
|
||||
"""
|
||||
if not merged_range:
|
||||
return None
|
||||
|
||||
min_row, max_row = merged_range.min_row, merged_range.max_row
|
||||
min_col, max_col = merged_range.min_col, merged_range.max_col
|
||||
|
||||
if max_row > min_row and max_col > min_col:
|
||||
return 'both' # 同时跨行和跨列
|
||||
elif max_row > min_row:
|
||||
return 'row' # 行合并(垂直合并)
|
||||
elif max_col > min_col:
|
||||
return 'column' # 列合并(水平合并)
|
||||
else:
|
||||
return None # 实际上不是合并单元格
|
||||
|
||||
# 计算属性等级
|
||||
def get_prop_level(prop_level):
|
||||
"""根据输入值判断 返回等级"""
|
||||
if pd.isna(prop_level) or prop_level == 0:
|
||||
return "-"
|
||||
# 请根据您的实际分级标准调整这里的阈值
|
||||
if int(prop_level) == 5 or prop_level == "砂质":
|
||||
return "砂质"
|
||||
elif int(prop_level) == 4 or prop_level == "砂壤质":
|
||||
return "砂壤质"
|
||||
elif int(prop_level) == 3 or prop_level == "壤质":
|
||||
return "壤质"
|
||||
elif int(prop_level) == 1 or prop_level == "黏壤质":
|
||||
return "黏壤质"
|
||||
elif int(prop_level) == 2 or prop_level == "黏质":
|
||||
return "黏质"
|
||||
else:
|
||||
return "-"
|
||||
|
||||
# 等级计算
|
||||
def process_soil_dataframe(df:pd.DataFrame, level_config, target_prop):
|
||||
"""
|
||||
处理土壤数据DataFrame,添加分级列
|
||||
"""
|
||||
result_df = df.copy()
|
||||
|
||||
if level_config and target_prop in df.columns:
|
||||
grade_standards = level_config["标准等级"]
|
||||
grade_column = "GRIDCODE"
|
||||
|
||||
# 使用向量化方法(性能更好)
|
||||
result_df[grade_column] = common_utils.vectorized_grade_assignment(
|
||||
df[target_prop].values, grade_standards
|
||||
)
|
||||
|
||||
# 统计分级结果
|
||||
result_df['YJDL'] = result_df['TDLYLX'].str[:2]
|
||||
|
||||
return result_df
|
||||
|
||||
# --- 3. 数据处理与分析 均值---
|
||||
def process_data_for_table1(gdb_path, soil_prop_feature_name, df_origin_area, target_areas_dict,xzqmc,is_by_xzq, prop_config=None):
|
||||
"""
|
||||
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
|
||||
"""
|
||||
print("开始处理数据...")
|
||||
|
||||
def clean_df(df, columns):
|
||||
for col in columns:
|
||||
df[col] = df[col].astype(str).str.strip()
|
||||
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
|
||||
df.dropna(subset=columns, inplace=True)
|
||||
return df
|
||||
|
||||
# ==a. 处理样点数据,计算“样点均值” ---
|
||||
print("--> 步骤1: 计算样点均值...")
|
||||
field_name = soil_prop_feature_name
|
||||
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
|
||||
sample_fields = ['TDLYLX', field_name]
|
||||
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
|
||||
df_samples = clean_df(df_samples, [field_name])
|
||||
|
||||
processed_df = df_samples.copy()
|
||||
processed_df['属性分级'] = processed_df[field_name]
|
||||
|
||||
# ===处理样点数据,计算 各分级样点数
|
||||
df_sample_means = processed_df.groupby(['属性分级']).size().reset_index(name='样点数')
|
||||
df_sample_means['样点数占比'] = df_sample_means['样点数'] / df_sample_means['样点数'].sum() * 100
|
||||
print("样点数计算完成。")
|
||||
|
||||
|
||||
# ==处理制图数据,获各等级制图面积
|
||||
# print(df_origin_area)
|
||||
df_origin_area['YJDL'] = df_origin_area['YJDL_EJDL'].str.split('_').str[0]
|
||||
df_map_data = df_origin_area.groupby(["XZQMC","YJDL", "GRIDCODE"]).agg({"temp_area": "sum"}).reset_index()
|
||||
# print(df_map_data)
|
||||
|
||||
try:
|
||||
if is_by_xzq:
|
||||
df_map_data['adjusted_area'] = df_map_data['temp_area']
|
||||
df_map_data['adjustment_factor'] = 1.0
|
||||
|
||||
# 获取所有存在的行政区和地类
|
||||
existing_districts = df_map_data['XZQMC'].unique()
|
||||
|
||||
# 检查目标字典中的行政区是否存在
|
||||
missing_districts = []
|
||||
tt = [td for td in target_areas_dict.keys()]
|
||||
for ed in existing_districts:
|
||||
if ed not in tt:
|
||||
missing_districts.append(ed)
|
||||
|
||||
# 如果有行政区不存在,返回原始数据并提示
|
||||
if missing_districts:
|
||||
print(f"警告:平差数据中不存在行政区: {missing_districts},未进行平差")
|
||||
|
||||
# 计算每个行政区每个地类的原始总面积
|
||||
original_totals = df_map_data.groupby(['XZQMC', 'YJDL'])['temp_area'].sum()
|
||||
|
||||
# 对每个行政区的每个地类进行平差
|
||||
for xzqmc, landuse_targets in target_areas_dict.items():
|
||||
for yjdl, target_area in landuse_targets.items():
|
||||
# 检查该行政区是否有此地类数据
|
||||
if (xzqmc, yjdl) in original_totals.index and original_totals[(xzqmc, yjdl)] > 0:
|
||||
adjustment_factor = target_area / original_totals[(xzqmc, yjdl)]
|
||||
|
||||
# 应用平差系数
|
||||
mask = (df_map_data['XZQMC'] == xzqmc) & (df_map_data['YJDL'] == yjdl)
|
||||
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
|
||||
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
|
||||
|
||||
# print(f"{xzqmc} - 地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
|
||||
else:
|
||||
# 用df_target_area按YJDL进行平差计算
|
||||
original_totals = df_map_data.groupby('YJDL')['temp_area'].sum().to_dict()
|
||||
# 对每个地类进行平差
|
||||
target_area_dict = target_areas_dict.get(xzqmc,"")
|
||||
# print(target_areas_dict)
|
||||
for yjdl, target_area in target_area_dict.items():
|
||||
if (yjdl in original_totals and original_totals[yjdl] > 0) or target_area > 0:
|
||||
adjustment_factor = target_area / original_totals[yjdl]
|
||||
|
||||
# 应用平差系数
|
||||
mask = df_map_data['YJDL'] == yjdl
|
||||
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
|
||||
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
|
||||
|
||||
# print(f"地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
|
||||
except Exception as e:
|
||||
print(f"平差处理失败: {e}")
|
||||
|
||||
# print(df_map_data)
|
||||
df_map_data['面积_亩'] = df_map_data['temp_area']
|
||||
|
||||
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level)
|
||||
df_map_areas = df_map_data.groupby(['属性分级'])['面积_亩'].sum().reset_index(name='制图面积')
|
||||
# 面积平差
|
||||
df_map_areas['制图面积_平差后'] = df_map_areas['制图面积']
|
||||
# ===计算面积占比
|
||||
df_map_areas['面积占比'] = df_map_areas['制图面积_平差后'] / df_map_areas['制图面积_平差后'].sum() * 100
|
||||
|
||||
# --- c. 合并数据 ---
|
||||
print("--> 步骤3: 合并数据...")
|
||||
df_skeleton = pd.concat([
|
||||
df_sample_means[['属性分级']],
|
||||
df_map_areas[['属性分级']]
|
||||
]).drop_duplicates().reset_index(drop=True)
|
||||
|
||||
df_final = pd.merge(df_skeleton, df_sample_means, on=['属性分级'], how='left')
|
||||
df_final = pd.merge(df_final, df_map_areas, on=['属性分级'], how='left')
|
||||
# print(df_final)
|
||||
df_final.sort_values(['属性分级'], inplace=True)
|
||||
|
||||
print("数据处理流程完成!")
|
||||
# print(df_final)
|
||||
return df_final
|
||||
|
||||
# --- 3. Excel 制表 总表---
|
||||
def write_to_excel_table1(df:pd.DataFrame, output_path, prop_config):
|
||||
"""
|
||||
【最终修正版】: 将处理好的数据写入格式化的 Excel 文件。
|
||||
"""
|
||||
if df.empty:
|
||||
print("警告: 没有数据可以写入 Excel,将创建一个空的报告。")
|
||||
wb = Workbook()
|
||||
ws = wb.create_sheet("Mysheet", 0)
|
||||
ws['A1'] = "没有有效的统计数据。"
|
||||
wb.save(output_path)
|
||||
return
|
||||
|
||||
print(f"开始生成 Excel 报告到 '{output_path}'...")
|
||||
wb = Workbook()
|
||||
ws = wb.create_sheet("Mysheet", 0)
|
||||
ws.title = "土壤质地分类分布"
|
||||
|
||||
# --- a. 定义样式 (不变) ---
|
||||
header_font = Font(name='宋体', size=11)
|
||||
cell_font = Font(name='宋体', size=11)
|
||||
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
|
||||
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
|
||||
top=Side(style='thin'), bottom=Side(style='thin'))
|
||||
|
||||
def apply_style(cell_range, font, alignment=None, border=None):
|
||||
for row in ws[cell_range]:
|
||||
for cell in row:
|
||||
cell.font = font
|
||||
if alignment: cell.alignment = alignment
|
||||
if border: cell.border = border
|
||||
|
||||
# --- b. 绘制表头 (不变) ---
|
||||
ws.merge_cells('A1:A2'); ws['A1'] = '土壤质地类别'
|
||||
ws.merge_cells('B1:C1'); ws['B1'] = '样点统计'
|
||||
ws.merge_cells('D1:E1'); ws['D1'] = '制图统计'
|
||||
|
||||
ws['B2'] = '数量/个'; ws['C2'] = '占比%'
|
||||
ws['D2'] = '面积/亩'; ws['E2'] = '占比%'
|
||||
|
||||
level_dict = prop_config['标准等级']
|
||||
# 创建两个列表来分别存储上段和下段范围
|
||||
upper_ranges = {value: key for key, value in level_dict.items()}
|
||||
|
||||
# --- c. 填充数据 ---
|
||||
current_row = 3
|
||||
|
||||
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
|
||||
|
||||
for index, row_data in df_to_write.iterrows():
|
||||
|
||||
# 检查是否找到了该土属的数据
|
||||
if not row_data.empty:
|
||||
|
||||
# 1. 构建要从 data_series 中查找的列名
|
||||
sample_col = f'样点数'
|
||||
sample_pct_col = f'样点数占比'
|
||||
area_col = f'制图面积_平差后'
|
||||
area_pct_col = f'面积占比'
|
||||
|
||||
# 2. 从 data_series 中安全地获取值
|
||||
row_name = row_data.get('属性分级', "")
|
||||
sample_val = row_data.get(sample_col, 0)
|
||||
sample_pct_val = row_data.get(sample_pct_col, 0)
|
||||
area_val = row_data.get(area_col, 0)
|
||||
area_pct_val = row_data.get(area_pct_col, 0)
|
||||
|
||||
ws.cell(row=current_row, column=1).value = f"{row_name}" if row_name else "-"
|
||||
# 3. 将获取到的值填入单元格
|
||||
ws.cell(row=current_row, column=2).value = f"{sample_val:.0f}" if sample_val > 0 else "-"
|
||||
# 占比/%
|
||||
ws.cell(row=current_row, column=3).value = f"{sample_pct_val:.1f}" if sample_val > 0 else "-"
|
||||
# 制图面积/亩
|
||||
ws.cell(row=current_row, column=4).value = f"{area_val:.0f}" if area_val > 0 else "-"
|
||||
# 占比/%
|
||||
ws.cell(row=current_row, column=5).value = f"{area_pct_val:.1f}" if area_val > 0 else "-"
|
||||
|
||||
current_row += 1
|
||||
|
||||
|
||||
# 2. 填充总计行
|
||||
ws.cell(row=current_row, column=1).value = '全区'
|
||||
ws.cell(row=current_row, column=2).value = df['样点数'].sum()
|
||||
ws.cell(row=current_row, column=3).value = '100'
|
||||
ws.cell(row=current_row, column=4).value = f"{df['制图面积_平差后'].sum():.0f}"
|
||||
ws.cell(row=current_row, column=5).value = '100'
|
||||
|
||||
# --- d. 应用样式和调整列宽 (最终健壮版) ---
|
||||
if current_row > 1: # 确保有数据才应用样式
|
||||
apply_style(f'A1:E{current_row}', cell_font, center_align, thin_border)
|
||||
apply_style(f'A1:E2', header_font)
|
||||
|
||||
print("正在自动调整列宽...")
|
||||
|
||||
dims = {}
|
||||
for row in ws.rows:
|
||||
for cell in row:
|
||||
if cell.value:
|
||||
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
|
||||
if get_merge_type(merged_range) == 'column':
|
||||
continue
|
||||
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
|
||||
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
|
||||
# 设置列宽
|
||||
for col, value in dims.items():
|
||||
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
|
||||
|
||||
# --- e. 保存文件 ---
|
||||
wb.save(output_path)
|
||||
print("Excel 报告生成成功!")
|
||||
|
||||
|
||||
def main(gdb_path, soil_prop_name, reclassed_features_path, dltb_features, output_path, target_area_dict,xzqmc, prop_config):
|
||||
try:
|
||||
# --- 1. 用户配置 ---
|
||||
# 输出配置
|
||||
temp_files = []
|
||||
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土壤分级分布.xlsx") # 生成的Excel报告文件路径
|
||||
|
||||
# 设置工作空间和变量
|
||||
arcpy.env.workspace = gdb_path
|
||||
arcpy.env.overwriteOutput = True
|
||||
|
||||
print("开始处理数据...")
|
||||
is_by_xzq = False if xzqmc not in ["北海市","来宾市","楚雄自治州"] else True
|
||||
|
||||
# out_table_mean = r"in_memory/out_table_mean"
|
||||
temp_out_feature_class = r"in_memory/temp_out_feature_class"
|
||||
temp_out_tables_area = r"in_memory/temp_out_tables_area"
|
||||
# temp_files.append(out_table_mean)
|
||||
temp_files.append(temp_out_tables_area)
|
||||
|
||||
# 求地类图斑和重分类栅格面的交集
|
||||
arcpy.analysis.Intersect(
|
||||
in_features=[dltb_features,reclassed_features_path],
|
||||
out_feature_class=temp_out_feature_class,
|
||||
join_attributes="ALL",
|
||||
output_type="INPUT"
|
||||
)
|
||||
# 行政区划和相交结果进行交集制表
|
||||
arcpy.analysis.TabulateIntersection(
|
||||
in_zone_features="行政区划", # 乡镇边界
|
||||
zone_fields="XZQMC",
|
||||
in_class_features=temp_out_feature_class,
|
||||
out_table=temp_out_tables_area,
|
||||
class_fields="gridcode;YJDL_EJDL",
|
||||
out_units="SQUARE_METERS"
|
||||
)
|
||||
clipped_table_df = arcgis_utils.read_arcgis_table(temp_out_tables_area)
|
||||
|
||||
# 生成表1 土壤属性分级分布 的统计Excel报告
|
||||
final_dataframe = process_data_for_table1(gdb_path, soil_prop_name, clipped_table_df, target_area_dict,xzqmc,is_by_xzq, prop_config)
|
||||
|
||||
write_to_excel_table1(final_dataframe, output_excel_path, prop_config)
|
||||
|
||||
# return df_with_factors
|
||||
except Exception as e:
|
||||
print(f"\n处理过程中发生严重错误: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
temp_files_processor.clean_up_temp_files(temp_files)
|
||||
|
||||
# --- 4. 主程序入口 ---
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
513
tools/core/soil_prop_stats/B1土壤属性分级分布.py
Normal file
513
tools/core/soil_prop_stats/B1土壤属性分级分布.py
Normal file
@@ -0,0 +1,513 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import re
|
||||
import arcpy
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font
|
||||
|
||||
from tools.core.utils import arcgis_utils, common_utils
|
||||
from tools.core.utils.os_utils import temp_files_processor
|
||||
from tools.core.utils.excel_utils import ExcelStyleUtils
|
||||
|
||||
|
||||
# --- 2. 辅助函数 ---
|
||||
xn_region = ['天峨县', '寻甸县', '罗平县', '丘北县', '永仁县', '南华县', '双柏县', '武定县', '祥云县', '楚雄彝族自治州']
|
||||
hn_region = ['北海市', '海城区', '银海区', '铁山港区', '港南区', '容县', '平南县', '兴宁区', '武鸣区', '邕宁区', '苍梧县', '靖西市', '西畴县', '马关县', '澜沧县', '双江县', '永德县']
|
||||
# 计算属性等级
|
||||
def get_prop_level(prop_level):
|
||||
"""根据输入值判断 返回等级"""
|
||||
if pd.isna(prop_level) or prop_level == 0:
|
||||
return "-"
|
||||
# 请根据您的实际分级标准调整这里的阈值
|
||||
if int(prop_level) == 1 or int(prop_level) == 6 or prop_level == '等级一':
|
||||
return "Ⅰ级"
|
||||
elif int(prop_level) == 2 or int(prop_level) == 7 or prop_level == '等级二':
|
||||
return "Ⅱ级"
|
||||
elif int(prop_level) == 3 or int(prop_level) == 8 or prop_level == '等级三':
|
||||
return "Ⅲ级"
|
||||
elif int(prop_level) == 4 or int(prop_level) == 9 or prop_level == '等级四':
|
||||
return "Ⅳ级"
|
||||
elif int(prop_level) == 5 or int(prop_level) == 10 or prop_level == '等级五':
|
||||
return "Ⅴ级"
|
||||
else:
|
||||
return "-"
|
||||
def get_prop_level_for_pH(prop_level):
|
||||
if pd.isna(prop_level) or prop_level == 0:
|
||||
return "-"
|
||||
if int(prop_level) == 5 or prop_level == "等级五":
|
||||
return "Ⅰ级"
|
||||
elif int(prop_level) in [4, 6] or prop_level in ["等级四", "等级六"]:
|
||||
return "Ⅱ级"
|
||||
elif int(prop_level) in [3, 7] or prop_level in ["等级三", "等级七"]:
|
||||
return "Ⅲ级"
|
||||
elif int(prop_level) in [2, 8] or prop_level in ["等级二", "等级八"]:
|
||||
return "Ⅳ级"
|
||||
elif int(prop_level) in [1, 9] or prop_level in ["等级一", "等级九"]:
|
||||
return "Ⅴ级"
|
||||
else:
|
||||
return "-"
|
||||
|
||||
def get_prop_level_for_hn_TRRZ(prop_level):
|
||||
if pd.isna(prop_level) or prop_level == 0:
|
||||
return "-"
|
||||
if int(prop_level) == 3 or prop_level == "等级三":
|
||||
return "Ⅰ级"
|
||||
elif int(prop_level) == 4 or prop_level == "等级四":
|
||||
return "Ⅱ级"
|
||||
elif int(prop_level) in [2, 5] or prop_level in ["等级二", "等级五"]:
|
||||
return "Ⅲ级"
|
||||
elif int(prop_level) == 6 or prop_level == "等级六":
|
||||
return "Ⅳ级"
|
||||
elif int(prop_level) in [1, 7] or prop_level in ["等级一", "等级七"]:
|
||||
return "Ⅴ级"
|
||||
else:
|
||||
return "-"
|
||||
|
||||
def get_prop_level_for_xn_TRRZ(prop_level):
|
||||
if pd.isna(prop_level) or prop_level == 0:
|
||||
return "-"
|
||||
if int(prop_level) == 4 or prop_level == "等级四":
|
||||
return "Ⅰ级"
|
||||
elif int(prop_level) in [3,5] or prop_level in ["等级三", "等级五"]:
|
||||
return "Ⅱ级"
|
||||
elif int(prop_level) == 6 or prop_level == "等级六":
|
||||
return "Ⅲ级"
|
||||
elif int(prop_level) in [2, 7] or prop_level in ["等级二", "等级七"]:
|
||||
return "Ⅳ级"
|
||||
elif int(prop_level) in [1, 8] or prop_level in ["等级一", "等级八"]:
|
||||
return "Ⅴ级"
|
||||
else:
|
||||
return "-"
|
||||
|
||||
|
||||
# 等级计算
|
||||
def process_soil_dataframe(df:pd.DataFrame, level_config, target_prop):
|
||||
"""
|
||||
处理土壤数据DataFrame,添加分级列
|
||||
"""
|
||||
result_df = df.copy()
|
||||
|
||||
if level_config and target_prop in df.columns:
|
||||
grade_standards = level_config["标准等级"]
|
||||
grade_column = "GRIDCODE"
|
||||
|
||||
# 使用向量化方法(性能更好)
|
||||
result_df[grade_column] = common_utils.vectorized_grade_assignment(
|
||||
df[target_prop].values, grade_standards
|
||||
)
|
||||
|
||||
# 统计分级结果
|
||||
result_df['YJDL'] = result_df['TDLYLX'].str[:2]
|
||||
|
||||
return result_df
|
||||
|
||||
# --- 3. 数据处理与分析 均值---
|
||||
def process_data_for_table1(gdb_path, soil_prop_feature_name, df_origin_area, target_areas_dict,xzqmc,is_by_xzq, prop_config=None):
|
||||
"""
|
||||
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
|
||||
"""
|
||||
print("开始处理数据...")
|
||||
|
||||
def clean_df(df, columns):
|
||||
for col in columns:
|
||||
df[col] = df[col].astype(str).str.strip()
|
||||
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
|
||||
df.dropna(subset=columns, inplace=True)
|
||||
return df
|
||||
|
||||
# ==a. 处理样点数据,计算“样点均值” ---
|
||||
print("--> 步骤1: 计算样点均值...")
|
||||
field_name = soil_prop_feature_name
|
||||
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
|
||||
sample_fields = ['TDLYLX', field_name]
|
||||
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
|
||||
df_samples = clean_df(df_samples, [field_name])
|
||||
|
||||
processed_df = process_soil_dataframe(df_samples, prop_config, field_name) # 返回具有属性分级的列
|
||||
processed_df['GRIDCODE'] = processed_df['GRIDCODE'].astype('int')
|
||||
if soil_prop_feature_name == 'PH':
|
||||
processed_df['属性分级'] = processed_df['GRIDCODE'].apply(get_prop_level_for_pH)
|
||||
elif soil_prop_feature_name == 'TRRZ' and xzqmc in hn_region:
|
||||
processed_df['属性分级'] = processed_df['GRIDCODE'].apply(get_prop_level_for_hn_TRRZ)
|
||||
elif soil_prop_feature_name == 'TRRZ' and xzqmc in xn_region:
|
||||
processed_df['属性分级'] = processed_df['GRIDCODE'].apply(get_prop_level_for_xn_TRRZ)
|
||||
else:
|
||||
processed_df['属性分级'] = processed_df['GRIDCODE'].apply(get_prop_level)
|
||||
|
||||
# 计算全部样点均值、中位值、范围
|
||||
processed_df[field_name] = processed_df[field_name].astype('float')
|
||||
stat_sample = {
|
||||
'min': processed_df[field_name].min(),
|
||||
'max': processed_df[field_name].max(),
|
||||
'mean':processed_df[field_name].mean(),
|
||||
'median': processed_df[field_name].median(),
|
||||
}
|
||||
|
||||
# ===处理样点数据,计算 各分级样点数
|
||||
df_sample_means = processed_df.groupby(['属性分级','GRIDCODE']).size().reset_index(name='样点数')
|
||||
df_sample_means['样点数占比'] = df_sample_means['样点数'] / df_sample_means['样点数'].sum() * 100
|
||||
print("样点数计算完成。")
|
||||
|
||||
|
||||
# ==处理制图数据,获各等级制图面积
|
||||
# print(df_origin_area)
|
||||
df_origin_area['YJDL'] = df_origin_area['YJDL_EJDL'].str.split('_').str[0]
|
||||
|
||||
# 定义需要过滤地类的属性列表
|
||||
filtered_props = ['ECA', 'EMG', 'ACU', 'AZN', 'AFE', 'AMN', 'AMO', 'AB', 'AS1', 'TSE']
|
||||
|
||||
# 如果当前属性在列表中,则只统计耕地和园地
|
||||
if soil_prop_feature_name in filtered_props:
|
||||
farmland_yjdl = ['耕地', '园地'] # 01: 耕地, 02: 园地
|
||||
df_origin_area = df_origin_area[df_origin_area['YJDL'].isin(farmland_yjdl)]
|
||||
print(f"过滤制图数据:仅统计耕地和园地(YJDL in {farmland_yjdl})")
|
||||
# 如果土壤属性为GZCHD,则只需要耕地的面积统计
|
||||
if soil_prop_feature_name in ['GZCHD']:
|
||||
df_origin_area = df_origin_area[df_origin_area['YJDL'] == '耕地']
|
||||
print(f"过滤制图数据:GZCHD仅统计耕地")
|
||||
df_map_data = df_origin_area.groupby(["XZQMC","YJDL", "GRIDCODE"]).agg({"temp_area": "sum"}).reset_index()
|
||||
# print(df_map_data)
|
||||
|
||||
try:
|
||||
if is_by_xzq:
|
||||
df_map_data['adjusted_area'] = df_map_data['temp_area']
|
||||
df_map_data['adjustment_factor'] = 1.0
|
||||
|
||||
# 获取所有存在的行政区和地类
|
||||
existing_districts = df_map_data['XZQMC'].unique()
|
||||
|
||||
# 检查目标字典中的行政区是否存在
|
||||
missing_districts = []
|
||||
tt = [td for td in target_areas_dict.keys()]
|
||||
for ed in existing_districts:
|
||||
if ed not in tt:
|
||||
missing_districts.append(ed)
|
||||
|
||||
# 如果有行政区不存在,返回原始数据并提示
|
||||
if missing_districts:
|
||||
print(f"警告:平差数据中不存在行政区: {missing_districts},未进行平差")
|
||||
|
||||
# 计算每个行政区每个地类的原始总面积
|
||||
original_totals = df_map_data.groupby(['XZQMC', 'YJDL'])['temp_area'].sum()
|
||||
|
||||
# 对每个行政区的每个地类进行平差
|
||||
for xzqmc, landuse_targets in target_areas_dict.items():
|
||||
for yjdl, target_area in landuse_targets.items():
|
||||
# 检查该行政区是否有此地类数据
|
||||
if (xzqmc, yjdl) in original_totals.index and original_totals[(xzqmc, yjdl)] > 0:
|
||||
adjustment_factor = target_area / original_totals[(xzqmc, yjdl)]
|
||||
|
||||
# 应用平差系数
|
||||
mask = (df_map_data['XZQMC'] == xzqmc) & (df_map_data['YJDL'] == yjdl)
|
||||
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
|
||||
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
|
||||
|
||||
# print(f"{xzqmc} - 地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
|
||||
else:
|
||||
# 用df_target_area按YJDL进行平差计算
|
||||
original_totals = df_map_data.groupby('YJDL')['temp_area'].sum().to_dict()
|
||||
# 对每个地类进行平差
|
||||
target_area_dict = target_areas_dict.get(xzqmc,"")
|
||||
# print(target_areas_dict)
|
||||
for yjdl, target_area in target_area_dict.items():
|
||||
if (yjdl in original_totals and original_totals[yjdl] > 0) or target_area > 0:
|
||||
adjustment_factor = target_area / original_totals[yjdl]
|
||||
|
||||
# 应用平差系数
|
||||
mask = df_map_data['YJDL'] == yjdl
|
||||
df_map_data.loc[mask, 'temp_area'] = df_map_data.loc[mask, 'temp_area'] * adjustment_factor
|
||||
df_map_data.loc[mask, 'adjustment_factor'] = adjustment_factor
|
||||
|
||||
# print(f"地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}")
|
||||
except Exception as e:
|
||||
print(f"平差处理失败: {e}")
|
||||
|
||||
# print(df_map_data)
|
||||
df_map_data['面积_亩'] = df_map_data['temp_area']
|
||||
|
||||
if soil_prop_feature_name == 'PH':
|
||||
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level_for_pH)
|
||||
elif soil_prop_feature_name == 'TRRZ' and xzqmc in hn_region:
|
||||
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level_for_hn_TRRZ)
|
||||
elif soil_prop_feature_name == 'TRRZ' and xzqmc in xn_region:
|
||||
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level_for_xn_TRRZ)
|
||||
else:
|
||||
df_map_data['属性分级'] = df_map_data['GRIDCODE'].apply(get_prop_level)
|
||||
|
||||
df_map_areas = df_map_data.groupby(['属性分级','GRIDCODE'])['面积_亩'].sum().reset_index(name='制图面积')
|
||||
# 面积平差
|
||||
df_map_areas['制图面积_平差后'] = df_map_areas['制图面积']
|
||||
# ===计算面积占比
|
||||
df_map_areas['面积占比'] = df_map_areas['制图面积_平差后'] / df_map_areas['制图面积_平差后'].sum() * 100
|
||||
|
||||
# --- c. 合并数据 ---
|
||||
print("--> 步骤3: 合并数据...")
|
||||
df_skeleton = pd.concat([
|
||||
df_sample_means[['属性分级','GRIDCODE']],
|
||||
df_map_areas[['属性分级','GRIDCODE']]
|
||||
]).drop_duplicates().reset_index(drop=True)
|
||||
|
||||
df_final = pd.merge(df_skeleton, df_sample_means, on=['属性分级','GRIDCODE'], how='left')
|
||||
df_final = pd.merge(df_final, df_map_areas, on=['属性分级','GRIDCODE'], how='left')
|
||||
# print(df_final)
|
||||
df_final.sort_values(['属性分级'], inplace=True)
|
||||
|
||||
print("数据处理流程完成!")
|
||||
# print(df_final)
|
||||
return df_final, stat_sample
|
||||
|
||||
# --- 3. Excel 制表 总表---
|
||||
def write_to_excel_table1(df:pd.DataFrame, output_path, prop_config, soil_prop_tif, stat_sample):
|
||||
"""
|
||||
【最终修正版】: 将处理好的数据写入格式化的 Excel 文件。
|
||||
"""
|
||||
if df.empty:
|
||||
print("警告: 没有数据可以写入 Excel,将创建一个空的报告。")
|
||||
wb = Workbook()
|
||||
ws = wb.create_sheet("Mysheet", 0)
|
||||
ws['A1'] = "没有有效的统计数据。"
|
||||
wb.save(output_path)
|
||||
return
|
||||
|
||||
# 全区制图统计
|
||||
"""
|
||||
try:
|
||||
raster = arcpy.Raster(soil_prop_tif)
|
||||
|
||||
# 转换为numpy数组进行计算
|
||||
array = arcpy.RasterToNumPyArray(raster,nodata_to_value=9999)
|
||||
|
||||
# 过滤掉NoData值
|
||||
# 过滤NoData值和9999值
|
||||
array = array[~np.isnan(array)] # 过滤NoData
|
||||
array = array[array != 9999] # 过滤9999
|
||||
array = array.astype(np.float64)
|
||||
|
||||
stats = {
|
||||
'min': round(np.min(array),2),
|
||||
'max': round(np.max(array),2),
|
||||
'mean': round(np.mean(array),2),
|
||||
'median': round(np.median(array),2),
|
||||
'std': round(np.std(array),2)
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"错误: {e}")
|
||||
"""
|
||||
# 全区样点统计
|
||||
stats = stat_sample
|
||||
|
||||
print(f"开始生成 Excel 报告到 '{output_path}'...")
|
||||
wb = Workbook()
|
||||
ws = wb.create_sheet("Mysheet", 0)
|
||||
ws.title = "行政区酸化程度等级分布及占比"
|
||||
|
||||
# 获取属性单位
|
||||
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅']
|
||||
fsn_props = ['砂粒含量','粉粒含量','黏粒含量','有效土层厚度']
|
||||
prop_name_str = prop_config.get('项目分级','')
|
||||
if prop_name_str:
|
||||
split_name = prop_name_str.split('\n')[0].strip()
|
||||
if split_name in special_prop:
|
||||
prop_name = '1f'
|
||||
elif split_name in fsn_props:
|
||||
prop_name = '0f'
|
||||
else:
|
||||
prop_name = '2f'
|
||||
else:
|
||||
prop_name = '1f'
|
||||
# print(prop_name_str, prop_name)
|
||||
|
||||
prop_unit_str = prop_config.get('分级标准', '')
|
||||
if prop_unit_str:
|
||||
prop_unit = prop_unit_str.split('\n')[1].strip()
|
||||
else:
|
||||
prop_unit = ''
|
||||
|
||||
# --- b. 绘制表头 (不变) ---
|
||||
ws.merge_cells('A1:B1'); ws['A1'] = '土壤三普分级'
|
||||
ws.merge_cells('C1:D1'); ws['C1'] = '样点统计'
|
||||
ws.merge_cells('E1:F1'); ws['E1'] = '制图统计'
|
||||
|
||||
ws['A2'] = '分级'; ws['B2'] = '值域/' + prop_unit if prop_unit else '值域'
|
||||
ws['C2'] = '数量/个'; ws['D2'] = '占比%'
|
||||
ws['E2'] = '面积/亩'; ws['F2'] = '占比%'
|
||||
|
||||
acid_levels = ['Ⅰ级','Ⅱ级', 'Ⅲ级', 'Ⅳ级', 'Ⅴ级']
|
||||
level_dict = prop_config['标准等级']
|
||||
# 创建两个列表来分别存储上段和下段范围
|
||||
upper_ranges = {}
|
||||
lower_ranges = {}
|
||||
|
||||
# 遍历排序后的等级
|
||||
for i, (level, ranges) in enumerate(sorted(level_dict.items(), key=lambda x: list(level_dict.keys()).index(x[0])), 1):
|
||||
# 分割范围字符串
|
||||
range_list = [r.strip() for r in ranges.split(',')]
|
||||
|
||||
if len(range_list) >= 1:
|
||||
upper_ranges[i] = range_list[0]
|
||||
|
||||
if len(range_list) >= 2:
|
||||
# 计算下段范围的索引(原始索引 + 等级总数)
|
||||
lower_index = i + len(level_dict)
|
||||
lower_ranges[lower_index] = range_list[1]
|
||||
|
||||
# 合并结果
|
||||
upper_ranges.update(lower_ranges)
|
||||
|
||||
# --- c. 填充数据 ---
|
||||
current_row = 3
|
||||
|
||||
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
|
||||
|
||||
for yl, group_yl_df in df_to_write.groupby('属性分级', sort=False, observed=False):
|
||||
|
||||
yl_start_row = current_row
|
||||
|
||||
# 1. 遍历该一级地类下的所有“二级地类”并写入数据
|
||||
for _, row_data in group_yl_df.iterrows():
|
||||
ws.cell(row=current_row, column=2).value = upper_ranges.get(row_data['GRIDCODE'], '-')
|
||||
|
||||
# --- 填充单元格的逻辑开始 ---
|
||||
col_start = 3 # 从第 C 列开始填充
|
||||
|
||||
# 检查是否找到了该土属的数据
|
||||
if not row_data.empty:
|
||||
|
||||
# 1. 构建要从 data_series 中查找的列名
|
||||
sample_col = f'样点数'
|
||||
sample_pct_col = f'样点数占比'
|
||||
area_col = f'制图面积_平差后'
|
||||
area_pct_col = f'面积占比'
|
||||
|
||||
# 2. 从 data_series 中安全地获取值
|
||||
sample_val = row_data.get(sample_col, 0)
|
||||
sample_pct_val = row_data.get(sample_pct_col, 0)
|
||||
area_val = row_data.get(area_col, 0)
|
||||
area_pct_val = row_data.get(area_pct_col, 0)
|
||||
|
||||
# 3. 将获取到的值填入单元格
|
||||
ws.cell(row=current_row, column=col_start).value = f"{sample_val:.0f}" if sample_val > 0 else "-"
|
||||
# 占比/%
|
||||
ws.cell(row=current_row, column=col_start + 1).value = f"{sample_pct_val:.1f}" if sample_val > 0 else "-"
|
||||
# 制图面积/亩
|
||||
ws.cell(row=current_row, column=col_start + 2).value = f"{area_val:.0f}" if area_val > 0 else "-"
|
||||
# 占比/%
|
||||
ws.cell(row=current_row, column=col_start + 3).value = f"{area_pct_val:.1f}" if area_val > 0 else "-"
|
||||
|
||||
# 移动到下一个酸化等级的起始列
|
||||
col_start += 2
|
||||
else:
|
||||
for _ in range(4):
|
||||
ws.cell(row=current_row, column=col_start).value = "-"
|
||||
col_start += 1
|
||||
|
||||
current_row += 1
|
||||
|
||||
# 合并“一级地类”单元格
|
||||
if yl_start_row <= current_row:
|
||||
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row-1, end_column=1)
|
||||
ws.cell(row=yl_start_row, column=1).value = yl
|
||||
|
||||
# 2. 填充总计行
|
||||
ws.cell(row=current_row, column=1).value = '全区'
|
||||
ws.cell(row=current_row, column=2).value = '-'
|
||||
ws.cell(row=current_row, column=3).value = df['样点数'].sum()
|
||||
ws.cell(row=current_row, column=4).value = '100'
|
||||
ws.cell(row=current_row, column=5).value = f"{df['制图面积_平差后'].sum():.0f}"
|
||||
ws.cell(row=current_row, column=6).value = '100'
|
||||
|
||||
# 3. 合计单元格填充
|
||||
ws.merge_cells(f'B{current_row + 1}:F{current_row + 1}')
|
||||
ws.cell(row=current_row + 1, column=1).value = '全区均值'
|
||||
ws.cell(row=current_row + 1, column=2).value = f'{stats["mean"]:.{prop_name}}'
|
||||
|
||||
ws.merge_cells(f'B{current_row + 2}:F{current_row + 2}')
|
||||
ws.cell(row=current_row + 2, column=1).value = '全区中位值'
|
||||
ws.cell(row=current_row + 2, column=2).value = f'{stats["median"]:.{prop_name}}'
|
||||
|
||||
ws.merge_cells(f'B{current_row + 3}:F{current_row + 3}')
|
||||
ws.cell(row=current_row + 3, column=1).value = '全区范围'
|
||||
ws.cell(row=current_row + 3, column=2).value = f'{stats["min"]:.{prop_name}} ~ {stats["max"]:.{prop_name}}'
|
||||
|
||||
# --- a. 定义样式 ---
|
||||
header_font = Font(name='宋体', size=11, bold=True)
|
||||
|
||||
# --- d. 应用样式和调整列宽 (最终健壮版) ---
|
||||
if current_row > 1: # 确保有数据才应用样式
|
||||
ExcelStyleUtils.set_style(ws, f'A1:F{current_row+3}')
|
||||
ExcelStyleUtils.set_style(ws, f'A1:F2', header_font)
|
||||
|
||||
# 调整列宽
|
||||
ExcelStyleUtils.auto_adjust_column_width(ws)
|
||||
|
||||
# --- e. 保存文件 ---
|
||||
wb.save(output_path)
|
||||
print("Excel 报告生成成功!")
|
||||
|
||||
|
||||
def main(gdb_path, soil_prop_name, reclassed_features_path, dltb_features, soil_prop_tif, output_path, target_area_dict,xzqmc, prop_config):
|
||||
try:
|
||||
# --- 1. 用户配置 ---
|
||||
# 输出配置
|
||||
temp_files = []
|
||||
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土壤分级分布.xlsx") # 生成的Excel报告文件路径
|
||||
|
||||
# 设置工作空间和变量
|
||||
arcpy.env.workspace = gdb_path
|
||||
arcpy.env.overwriteOutput = True
|
||||
|
||||
print("开始处理数据...")
|
||||
is_by_xzq = False if xzqmc not in ["北海市","来宾市","楚雄自治州"] else True
|
||||
|
||||
# out_table_mean = r"in_memory/out_table_mean"
|
||||
temp_out_feature_class = r"in_memory/temp_out_feature_class"
|
||||
temp_out_tables_area = r"in_memory/temp_out_tables_area"
|
||||
# temp_files.append(out_table_mean)
|
||||
temp_files.append(temp_out_tables_area)
|
||||
|
||||
# if not arcpy.Exists(out_table_mean):
|
||||
# # 2.用arcpy.sa.ZonalStatisticsAsTable 以表格进行分区统计
|
||||
# arcpy.sa.ZonalStatisticsAsTable(
|
||||
# dltb_features, "YJDL_EJDL", soil_prop_tif, out_table_mean, "DATA", "MEAN"
|
||||
# )
|
||||
# arcpy.management.CalculateField(out_table_mean, "YJDL", "!YJDL_EJDL!.split('_')[0]", "PYTHON3")
|
||||
# arcpy.management.CalculateField(out_table_mean, "EJDL", "!YJDL_EJDL!.split('_')[1]", "PYTHON3")
|
||||
|
||||
# 求地类图斑和重分类栅格面的交集
|
||||
arcpy.analysis.Intersect(
|
||||
in_features=[dltb_features,reclassed_features_path],
|
||||
out_feature_class=temp_out_feature_class,
|
||||
join_attributes="ALL",
|
||||
output_type="INPUT"
|
||||
)
|
||||
# 行政区划和相交结果进行交集制表
|
||||
arcpy.analysis.TabulateIntersection(
|
||||
in_zone_features="行政区划", # 乡镇边界
|
||||
zone_fields="XZQMC",
|
||||
in_class_features=temp_out_feature_class,
|
||||
out_table=temp_out_tables_area,
|
||||
class_fields="gridcode;YJDL_EJDL",
|
||||
out_units="SQUARE_METERS"
|
||||
)
|
||||
clipped_table_df = arcgis_utils.read_arcgis_table(temp_out_tables_area)
|
||||
|
||||
# 生成表1 土壤属性分级分布 的统计Excel报告
|
||||
final_dataframe,stat = process_data_for_table1(gdb_path, soil_prop_name, clipped_table_df, target_area_dict,xzqmc,is_by_xzq, prop_config)
|
||||
|
||||
write_to_excel_table1(final_dataframe, output_excel_path, prop_config, soil_prop_tif, stat)
|
||||
|
||||
# return df_with_factors
|
||||
except Exception as e:
|
||||
print(f"\n处理过程中发生严重错误: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
temp_files_processor.clean_up_temp_files(temp_files)
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
# --- 4. 主程序入口 ---
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
315
tools/core/soil_prop_stats/B2_TRZD12土地利用类型土壤属性.py
Normal file
315
tools/core/soil_prop_stats/B2_TRZD12土地利用类型土壤属性.py
Normal file
@@ -0,0 +1,315 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import re
|
||||
import arcpy
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, Border, Side, Alignment
|
||||
from openpyxl.utils import get_column_letter
|
||||
|
||||
from tools.config.pandas_field_cal_func import calculate_ejdl, calculate_yjdl
|
||||
from tools.core.utils.os_utils import temp_files_processor
|
||||
|
||||
|
||||
yjdl_order = ["耕地", "园地", "林地", "草地", "其他"]
|
||||
ejdl_order = ["水田", "旱地", "水浇地", "果园", "茶园", "橡胶园", "其他园地"]
|
||||
# 土壤12级地质类别
|
||||
trzd_order = ['砂土及壤质砂土', '砂质壤土','壤土','粉(砂)质壤土','砂质黏壤土','黏壤土','粉(砂)质黏壤土','砂质黏土','壤质黏土','粉(砂)质黏土','黏土','重黏土']
|
||||
|
||||
# --- 2. 辅助函数 ---
|
||||
# 判断单元格类型
|
||||
def get_merge_type(merged_range):
|
||||
"""
|
||||
判断合并类型
|
||||
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None(不是合并单元格)
|
||||
"""
|
||||
if not merged_range:
|
||||
return None
|
||||
|
||||
min_row, max_row = merged_range.min_row, merged_range.max_row
|
||||
min_col, max_col = merged_range.min_col, merged_range.max_col
|
||||
|
||||
if max_row > min_row and max_col > min_col:
|
||||
return 'both' # 同时跨行和跨列
|
||||
elif max_row > min_row:
|
||||
return 'row' # 行合并(垂直合并)
|
||||
elif max_col > min_col:
|
||||
return 'column' # 列合并(水平合并)
|
||||
else:
|
||||
return None # 实际上不是合并单元格
|
||||
|
||||
# --- 3. 数据处理与分析 均值---
|
||||
def process_data_for_table2(gdb_path, soil_prop_feature_name, df_dltb, target_areas_df):
|
||||
"""
|
||||
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
|
||||
"""
|
||||
print("开始处理数据...")
|
||||
|
||||
def clean_df(df, columns):
|
||||
for col in columns:
|
||||
df[col] = df[col].astype(str).str.strip()
|
||||
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
|
||||
df.dropna(subset=columns, inplace=True)
|
||||
return df
|
||||
|
||||
# ==a. 处理样点数据,计算样点数 ---
|
||||
print("--> 步骤1: 计算样点均值...")
|
||||
field_name = soil_prop_feature_name
|
||||
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
|
||||
sample_fields = ['TDLYLX', field_name]
|
||||
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
|
||||
df_samples = clean_df(df_samples, [field_name])
|
||||
|
||||
df_samples["YJDL"] = df_samples['TDLYLX'].apply(calculate_yjdl)
|
||||
df_samples["EJDL"] = df_samples['TDLYLX'].apply(calculate_ejdl)
|
||||
df_samples["GRIDCODE"] = df_samples[field_name].astype(int)
|
||||
|
||||
# 按 YJDL, EJDL 分组,计算 属性 的均值
|
||||
df_sample_means = df_samples.groupby(['YJDL', 'EJDL', 'GRIDCODE']).size().reset_index(name="样点数")
|
||||
total_sample_count = df_sample_means['样点数'].sum()
|
||||
df_sample_means['样点数占比'] = df_sample_means['样点数'] / total_sample_count
|
||||
|
||||
# ==b. 处理制图数据,获各等级制图面积
|
||||
df_dltb["YJDL"] = df_dltb['YJDL_EJDL'].apply(lambda x: x.split('_')[0])
|
||||
df_dltb["EJDL"] = df_dltb["YJDL_EJDL"].apply(lambda x: x.split('_')[1])
|
||||
df_dltb.columns = df_dltb.columns.str.upper()
|
||||
df_dltb = clean_df(df_dltb, ['YJDL', 'EJDL'])
|
||||
|
||||
df_map_data = df_dltb.groupby(["YJDL","EJDL", "GRIDCODE"]).agg({"AREA": "sum"}).reset_index()
|
||||
df_map_data['制图面积_原始'] = df_map_data['AREA'] * 0.0015 # 单位:亩
|
||||
# df_map_data['面积占比'] = df_map_data['制图面积'] / df_map_data['制图面积'].sum()
|
||||
|
||||
# 第二步:整理目标面积表(确保字段名统一)
|
||||
target_areas_df = target_areas_df.copy()
|
||||
target_areas_df.columns = target_areas_df.columns.str.strip() # 去除字段名空格
|
||||
# 重置索引,确保EJDL是列而不是索引
|
||||
if 'EJDL' not in target_areas_df.columns:
|
||||
target_areas_df = target_areas_df.reset_index()
|
||||
target_areas_df.rename(columns={'index': 'EJDL'}, inplace=True)
|
||||
# 确保面积字段为数值型
|
||||
target_areas_df['面积'] = pd.to_numeric(target_areas_df['面积'], errors='coerce').fillna(0)
|
||||
|
||||
# 第三步:按二级地类分组计算平差系数
|
||||
# 先计算每个二级地类的原始合计面积
|
||||
ejdl_original_sum = df_map_data.groupby('EJDL')['制图面积_原始'].sum().reset_index()
|
||||
ejdl_original_sum.rename(columns={'制图面积_原始': '原始合计面积'}, inplace=True)
|
||||
# 合并目标面积
|
||||
ejdl_adj = pd.merge(ejdl_original_sum, target_areas_df, on='EJDL', how='left')
|
||||
ejdl_adj.rename(columns={'面积': '目标合计面积'}, inplace=True)
|
||||
# 填充无目标面积的二级地类(目标面积=原始面积,平差系数=1)
|
||||
ejdl_adj['目标合计面积'] = ejdl_adj['目标合计面积'].fillna(ejdl_adj['原始合计面积'])
|
||||
# 计算平差系数(目标面积 / 原始面积,避免除以0)
|
||||
ejdl_adj['平差系数'] = ejdl_adj['目标合计面积'] / ejdl_adj['原始合计面积'].replace(0, 1)
|
||||
ejdl_adj['平差系数'] = ejdl_adj['平差系数'].fillna(1) # 极端情况填充1
|
||||
|
||||
# 第四步:应用平差系数到每个质地级别的制图面积
|
||||
df_map_data = pd.merge(df_map_data, ejdl_adj[['EJDL', '平差系数']], on='EJDL', how='left')
|
||||
df_map_data['平差系数'] = df_map_data['平差系数'].fillna(1) # 未匹配到的二级地类系数=1
|
||||
# 计算平差后的制图面积
|
||||
df_map_data['制图面积'] = df_map_data['制图面积_原始'] * df_map_data['平差系数']
|
||||
# 重新计算面积占比(基于平差后的面积)
|
||||
total_adjusted_area = df_map_data['制图面积'].sum()
|
||||
df_map_data['面积占比'] = df_map_data['制图面积'] / total_adjusted_area
|
||||
df_map_data = clean_df(df_map_data, ['YJDL', 'EJDL'])
|
||||
|
||||
# --- c. 合并数据 ---
|
||||
print("--> 步骤3: 合并数据...")
|
||||
df_skeleton = pd.concat([
|
||||
df_sample_means[['YJDL', 'EJDL', 'GRIDCODE']],
|
||||
df_map_data[['YJDL', 'EJDL', 'GRIDCODE']]
|
||||
]).drop_duplicates().reset_index(drop=True)
|
||||
|
||||
df_final = pd.merge(df_skeleton, df_sample_means, on=['YJDL', 'EJDL', 'GRIDCODE'], how='left')
|
||||
df_final = pd.merge(df_final, df_map_data, on=['YJDL', 'EJDL', 'GRIDCODE'], how='left')
|
||||
|
||||
# (可选) 按“一级地类”和“二级地类”排序
|
||||
in_ejdl_order = ejdl_order + [x for x in df_final['EJDL'].unique() if x not in ejdl_order]
|
||||
df_final["YJDL"] = pd.Categorical(df_final['YJDL'], categories=yjdl_order, ordered=True)
|
||||
df_final["EJDL"] = pd.Categorical(df_final['EJDL'], categories=in_ejdl_order, ordered=True)
|
||||
df_final["GRIDCODE"] = pd.Categorical(df_final['GRIDCODE'], categories=sorted(df_final['GRIDCODE'].unique()), ordered=True)
|
||||
df_final.sort_values(['YJDL', 'EJDL', 'GRIDCODE'], inplace=True)
|
||||
|
||||
print("数据处理流程完成!")
|
||||
return df_final
|
||||
|
||||
# 写入EXCEL 表2
|
||||
def write_to_excel_table2(df, output_path, prop_config):
|
||||
"""
|
||||
将处理好的数据写入格式化的 Excel 文件。
|
||||
"""
|
||||
if df.empty:
|
||||
print("警告: 没有数据可以写入 Excel。")
|
||||
return
|
||||
|
||||
print(f"开始生成 Excel 报告到 '{output_path}'...")
|
||||
wb = Workbook()
|
||||
ws = wb.create_sheet("Mysheet", 0)
|
||||
ws.title = "不同土地利用类型属性变化统计"
|
||||
|
||||
# --- a. 定义样式 ---
|
||||
header_font = Font(name='等线', size=11, bold=True)
|
||||
cell_font = Font(name='等线', size=11)
|
||||
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
|
||||
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
|
||||
top=Side(style='thin'), bottom=Side(style='thin'))
|
||||
|
||||
def apply_style(cell_range, font, alignment=None, border=None):
|
||||
for row in ws[cell_range]:
|
||||
for cell in row:
|
||||
cell.font = font
|
||||
if alignment: cell.alignment = alignment
|
||||
if border: cell.border = border
|
||||
|
||||
# --- b. 绘制表头 ---
|
||||
ws.merge_cells('A1:B1'); ws['A1'] = '土地利用类型'
|
||||
ws.merge_cells('C1:E1'); ws['C1'] = '样点统计'
|
||||
ws.merge_cells('F1:G1'); ws['F1'] = '制图统计'
|
||||
|
||||
ws['A2'] = '一级'
|
||||
ws['B2'] = '二级'
|
||||
ws['C2'] = '质地类型'
|
||||
ws['D2'] = '数量/个'
|
||||
ws['E2'] = '占比%'
|
||||
ws['F2'] = '面积/亩'
|
||||
ws['G2'] = '占比%'
|
||||
|
||||
level_dict = prop_config['标准等级']
|
||||
# 创建两个列表来分别存储上段和下段范围
|
||||
upper_ranges = {value: key for key, value in level_dict.items()}
|
||||
|
||||
# --- c. 填充数据 ---
|
||||
current_row = 3
|
||||
|
||||
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
|
||||
|
||||
for yl, group_yl_df in df_to_write.groupby('YJDL', sort=False, observed=False):
|
||||
if group_yl_df.empty:
|
||||
continue
|
||||
|
||||
print(f"正在写入一级地类: {yl}...")
|
||||
yl_start_row = current_row
|
||||
|
||||
# 按二级地类分组
|
||||
for ej, group_ej_df in group_yl_df.groupby('EJDL', sort=False, observed=False):
|
||||
if group_ej_df.empty:
|
||||
continue
|
||||
|
||||
print(f"正在写入二级地类: {ej}...")
|
||||
ej_start_row = current_row
|
||||
|
||||
# 按“土壤质地分级”分组
|
||||
for idx, row_data in group_ej_df.iterrows():
|
||||
# 填充土壤质地分类
|
||||
ws.cell(row=current_row, column=3).value = upper_ranges.get(str(row_data['GRIDCODE']), '-')
|
||||
|
||||
# 填充样点数据
|
||||
ws.cell(row=current_row, column=4).value = row_data['样点数'] if not np.isnan(row_data['样点数']) else '-'
|
||||
ws.cell(row=current_row, column=5).value = round(row_data['样点数占比']*100, 2) if not np.isnan(row_data['样点数占比']) else '-'
|
||||
# 填充制图数据
|
||||
ws.cell(row=current_row, column=6).value = round(row_data['制图面积'], 0) if not np.isnan(row_data['制图面积']) else '-'
|
||||
ws.cell(row=current_row, column=7).value = round(row_data['面积占比']*100, 2) if not np.isnan(row_data['面积占比']) else '-'
|
||||
|
||||
current_row += 1
|
||||
|
||||
# 合并二级地类单元格
|
||||
if ej_start_row <= current_row:
|
||||
ws.merge_cells(start_row=ej_start_row, start_column=2, end_row=current_row-1, end_column=2)
|
||||
ws.cell(row=ej_start_row, column=2).value = ej
|
||||
|
||||
# 一级地类合计行
|
||||
ws.merge_cells(start_row=current_row, start_column=2, end_row=current_row, end_column=3)
|
||||
ws.cell(row=current_row, column=2).value = '合计'
|
||||
ws.cell(row=current_row, column=4).value = round(group_yl_df['样点数'].sum(), 0) if not np.isnan(group_yl_df['样点数'].sum()) else '-'
|
||||
ws.cell(row=current_row, column=5).value = round(group_yl_df['样点数占比'].sum()*100, 2) if not np.isnan(group_yl_df['样点数占比'].sum()) else '-'
|
||||
ws.cell(row=current_row, column=6).value = round(group_yl_df['制图面积'].sum(), 0) if not np.isnan(group_yl_df['制图面积'].sum()) else '-'
|
||||
ws.cell(row=current_row, column=7).value = round(group_yl_df['面积占比'].sum()*100, 2) if not np.isnan(group_yl_df['面积占比'].sum()) else '-'
|
||||
|
||||
# 合并一级地类单元格(修正合并范围)
|
||||
if yl_start_row <= current_row:
|
||||
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
|
||||
ws.cell(row=yl_start_row, column=1).value = yl
|
||||
current_row += 1
|
||||
|
||||
# --- 5. 全区汇总行 ---
|
||||
ws.cell(row=current_row, column=1).value = '全区汇总'
|
||||
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
|
||||
ws.cell(row=current_row, column=4).value = round(df_to_write['样点数'].sum(), 0) if not np.isnan(df_to_write['样点数'].sum()) else '-'
|
||||
ws.cell(row=current_row, column=5).value = round(df_to_write['样点数占比'].sum()*100, 2) if not np.isnan(df_to_write['样点数占比'].sum()) else '-'
|
||||
ws.cell(row=current_row, column=6).value = round(df_to_write['制图面积'].sum(), 0) if not np.isnan(df_to_write['制图面积'].sum()) else '-'
|
||||
ws.cell(row=current_row, column=7).value = round(df_to_write['面积占比'].sum()*100, 2) if not np.isnan(df_to_write['面积占比'].sum()) else '-'
|
||||
|
||||
# --- d. 应用样式和调整列宽 ---
|
||||
max_col_letter = get_column_letter(ws.max_column)
|
||||
if current_row > 1: # 确保有数据才应用样式
|
||||
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
|
||||
apply_style(f'A1:{max_col_letter}2', header_font)
|
||||
|
||||
print("正在自动调整列宽...")
|
||||
|
||||
dims = {}
|
||||
for row in ws.rows:
|
||||
for cell in row:
|
||||
if cell.value:
|
||||
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
|
||||
if get_merge_type(merged_range) == 'column':
|
||||
continue
|
||||
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
|
||||
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
|
||||
# 设置列宽
|
||||
for col, value in dims.items():
|
||||
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
|
||||
|
||||
# --- e. 保存文件 ---
|
||||
wb.save(output_path)
|
||||
print("Excel 报告生成成功!")
|
||||
|
||||
# def main(gdb_path, soil_prop_name, dltb_features, reclassed_feature, output_path,target_areas_df, prop_config):
|
||||
# print(target_areas_df)
|
||||
# df = pd.read_csv(r"D:\ProgramData\ArcGis_Py\测试数据.csv")
|
||||
# output_path = r"E:\@三普属性图出图\测试\AAA.xlsx"
|
||||
# write_to_excel_table2(df,output_path,prop_config)
|
||||
|
||||
|
||||
def main(gdb_path, soil_prop_name, dltb_features, reclassed_feature, output_path,target_areas_df, prop_config):
|
||||
try:
|
||||
# --- 1. 用户配置 ---
|
||||
# 输出配置
|
||||
temp_files = []
|
||||
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土地利用类型土壤.xlsx") # 生成的Excel报告文件路径
|
||||
|
||||
# 设置工作空间和变量
|
||||
arcpy.env.workspace = gdb_path
|
||||
arcpy.env.overwriteOutput = True
|
||||
|
||||
print("开始处理数据...")
|
||||
|
||||
out_table_mean = r"in_memory/out_table_mean"
|
||||
temp_files.append(out_table_mean)
|
||||
if not arcpy.Exists(out_table_mean):
|
||||
# 2.使用交集制表计算每个TRZD的面积
|
||||
arcpy.analysis.TabulateIntersection(dltb_features, "YJDL_EJDL", reclassed_feature, out_table_mean, "gridcode", out_units="SQUARE_METERS")
|
||||
|
||||
dltb_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YJDL_EJDL", "gridcode", "AREA"]))
|
||||
|
||||
|
||||
# 生成表1 土壤属性分级分布 的统计Excel报告
|
||||
final_dataframe = process_data_for_table2(gdb_path, soil_prop_name, dltb_df, target_areas_df)
|
||||
|
||||
write_to_excel_table2(final_dataframe, output_excel_path, prop_config)
|
||||
|
||||
# return df_with_factors
|
||||
except Exception as e:
|
||||
print(f"\n处理过程中发生严重错误: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
temp_files_processor.clean_up_temp_files(temp_files)
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
# --- 4. 主程序入口 ---
|
||||
# if __name__ == "__main__":
|
||||
# df = pd.read_csv(r"D:\ProgramData\ArcGis_Py\测试数据.csv")
|
||||
# output_path = r"E:\@三普属性图出图\测试\AAA.xlsx"
|
||||
# write_to_excel_table2(df,output_path)
|
||||
336
tools/core/soil_prop_stats/B2_TRZD土地利用类型土壤属性.py
Normal file
336
tools/core/soil_prop_stats/B2_TRZD土地利用类型土壤属性.py
Normal file
@@ -0,0 +1,336 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import re
|
||||
|
||||
from matplotlib.artist import get
|
||||
import arcpy
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, Border, Side, Alignment
|
||||
from openpyxl.utils import get_column_letter
|
||||
|
||||
from tools.config.pandas_field_cal_func import calculate_ejdl, calculate_yjdl
|
||||
from tools.core.utils.os_utils import temp_files_processor
|
||||
|
||||
|
||||
yjdl_order = ["耕地", "园地", "林地", "草地", "其他"]
|
||||
ejdl_order = ["水田", "旱地", "水浇地", "果园", "茶园", "橡胶园", "其他园地"]
|
||||
# 土壤12级地质类别
|
||||
trzd_order = ['黏壤质','黏质','壤质','砂壤质','砂质']
|
||||
|
||||
# --- 2. 辅助函数 ---
|
||||
def get_prop_level(prop_level):
|
||||
"""根据输入值判断 返回等级"""
|
||||
if pd.isna(prop_level) or prop_level == 0:
|
||||
return "-"
|
||||
# 请根据您的实际分级标准调整这里的阈值
|
||||
if int(prop_level) == 5 or prop_level == "砂质":
|
||||
return "砂质"
|
||||
elif int(prop_level) == 4 or prop_level == "砂壤质":
|
||||
return "砂壤质"
|
||||
elif int(prop_level) == 3 or prop_level == "壤质":
|
||||
return "壤质"
|
||||
elif int(prop_level) == 1 or prop_level == "黏壤质":
|
||||
return "黏壤质"
|
||||
elif int(prop_level) == 2 or prop_level == "黏质":
|
||||
return "黏质"
|
||||
else:
|
||||
return "-"
|
||||
|
||||
# 判断单元格类型
|
||||
def get_merge_type(merged_range):
|
||||
"""
|
||||
判断合并类型
|
||||
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None(不是合并单元格)
|
||||
"""
|
||||
if not merged_range:
|
||||
return None
|
||||
|
||||
min_row, max_row = merged_range.min_row, merged_range.max_row
|
||||
min_col, max_col = merged_range.min_col, merged_range.max_col
|
||||
|
||||
if max_row > min_row and max_col > min_col:
|
||||
return 'both' # 同时跨行和跨列
|
||||
elif max_row > min_row:
|
||||
return 'row' # 行合并(垂直合并)
|
||||
elif max_col > min_col:
|
||||
return 'column' # 列合并(水平合并)
|
||||
else:
|
||||
return None # 实际上不是合并单元格
|
||||
|
||||
# --- 3. 数据处理与分析 均值---
|
||||
def process_data_for_table2(gdb_path, soil_prop_feature_name, df_dltb, target_areas_df):
|
||||
"""
|
||||
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
|
||||
"""
|
||||
print("开始处理数据...")
|
||||
|
||||
def clean_df(df, columns):
|
||||
for col in columns:
|
||||
df[col] = df[col].astype(str).str.strip()
|
||||
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
|
||||
df.dropna(subset=columns, inplace=True)
|
||||
return df
|
||||
|
||||
# ==a. 处理样点数据,计算样点数 ---
|
||||
print("--> 步骤1: 计算样点均值...")
|
||||
field_name = soil_prop_feature_name
|
||||
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
|
||||
sample_fields = ['TDLYLX', field_name]
|
||||
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
|
||||
df_samples = clean_df(df_samples, [field_name])
|
||||
|
||||
df_samples["YJDL"] = df_samples['TDLYLX'].apply(calculate_yjdl)
|
||||
df_samples["EJDL"] = df_samples['TDLYLX'].apply(calculate_ejdl)
|
||||
df_samples["GRIDCODE"] = df_samples[field_name]
|
||||
|
||||
# 按 YJDL, EJDL 分组,计算 属性 的均值
|
||||
df_sample_means = df_samples.groupby(['YJDL', 'EJDL', 'GRIDCODE']).size().reset_index(name="样点数")
|
||||
total_sample_count = df_sample_means['样点数'].sum()
|
||||
df_sample_means['样点数占比'] = df_sample_means['样点数'] / total_sample_count
|
||||
|
||||
# ==b. 处理制图数据,获各等级制图面积
|
||||
df_dltb["YJDL"] = df_dltb['YJDL_EJDL'].apply(lambda x: x.split('_')[0])
|
||||
df_dltb["EJDL"] = df_dltb["YJDL_EJDL"].apply(lambda x: x.split('_')[1])
|
||||
df_dltb.columns = df_dltb.columns.str.upper()
|
||||
df_dltb = clean_df(df_dltb, ['YJDL', 'EJDL'])
|
||||
df_dltb['GRIDCODE'] = df_dltb['GRIDCODE'].apply(get_prop_level)
|
||||
|
||||
df_map_data = df_dltb.groupby(["YJDL","EJDL", "GRIDCODE"]).agg({"AREA": "sum"}).reset_index()
|
||||
df_map_data['制图面积_原始'] = df_map_data['AREA'] * 0.0015 # 单位:亩
|
||||
# df_map_data['面积占比'] = df_map_data['制图面积'] / df_map_data['制图面积'].sum()
|
||||
|
||||
# 第二步:整理目标面积表(确保字段名统一)
|
||||
target_areas_df = target_areas_df.copy()
|
||||
target_areas_df.columns = target_areas_df.columns.str.strip() # 去除字段名空格
|
||||
# 重置索引,确保EJDL是列而不是索引
|
||||
if 'EJDL' not in target_areas_df.columns:
|
||||
target_areas_df = target_areas_df.reset_index()
|
||||
target_areas_df.rename(columns={'index': 'EJDL'}, inplace=True)
|
||||
# 确保面积字段为数值型
|
||||
target_areas_df['面积'] = pd.to_numeric(target_areas_df['面积'], errors='coerce').fillna(0)
|
||||
|
||||
# 第三步:按二级地类分组计算平差系数
|
||||
# 先计算每个二级地类的原始合计面积
|
||||
ejdl_original_sum = df_map_data.groupby('EJDL')['制图面积_原始'].sum().reset_index()
|
||||
ejdl_original_sum.rename(columns={'制图面积_原始': '原始合计面积'}, inplace=True)
|
||||
# 合并目标面积
|
||||
ejdl_adj = pd.merge(ejdl_original_sum, target_areas_df, on='EJDL', how='left')
|
||||
ejdl_adj.rename(columns={'面积': '目标合计面积'}, inplace=True)
|
||||
# 填充无目标面积的二级地类(目标面积=原始面积,平差系数=1)
|
||||
ejdl_adj['目标合计面积'] = ejdl_adj['目标合计面积'].fillna(ejdl_adj['原始合计面积'])
|
||||
# 计算平差系数(目标面积 / 原始面积,避免除以0)
|
||||
ejdl_adj['平差系数'] = ejdl_adj['目标合计面积'] / ejdl_adj['原始合计面积'].replace(0, 1)
|
||||
ejdl_adj['平差系数'] = ejdl_adj['平差系数'].fillna(1) # 极端情况填充1
|
||||
|
||||
# 第四步:应用平差系数到每个质地级别的制图面积
|
||||
df_map_data = pd.merge(df_map_data, ejdl_adj[['EJDL', '平差系数']], on='EJDL', how='left')
|
||||
df_map_data['平差系数'] = df_map_data['平差系数'].fillna(1) # 未匹配到的二级地类系数=1
|
||||
# 计算平差后的制图面积
|
||||
df_map_data['制图面积'] = df_map_data['制图面积_原始'] * df_map_data['平差系数']
|
||||
# 重新计算面积占比(基于平差后的面积)
|
||||
total_adjusted_area = df_map_data['制图面积'].sum()
|
||||
df_map_data['面积占比'] = df_map_data['制图面积'] / total_adjusted_area
|
||||
df_map_data = clean_df(df_map_data, ['YJDL', 'EJDL'])
|
||||
|
||||
# --- c. 合并数据 ---
|
||||
print("--> 步骤3: 合并数据...")
|
||||
df_skeleton = pd.concat([
|
||||
df_sample_means[['YJDL', 'EJDL', 'GRIDCODE']],
|
||||
df_map_data[['YJDL', 'EJDL', 'GRIDCODE']]
|
||||
]).drop_duplicates().reset_index(drop=True)
|
||||
|
||||
df_final = pd.merge(df_skeleton, df_sample_means, on=['YJDL', 'EJDL', 'GRIDCODE'], how='left')
|
||||
df_final = pd.merge(df_final, df_map_data, on=['YJDL', 'EJDL', 'GRIDCODE'], how='left')
|
||||
|
||||
# (可选) 按“一级地类”和“二级地类”排序
|
||||
in_ejdl_order = ejdl_order + [x for x in df_final['EJDL'].unique() if x not in ejdl_order]
|
||||
df_final["YJDL"] = pd.Categorical(df_final['YJDL'], categories=yjdl_order, ordered=True)
|
||||
df_final["EJDL"] = pd.Categorical(df_final['EJDL'], categories=in_ejdl_order, ordered=True)
|
||||
df_final["GRIDCODE"] = pd.Categorical(df_final['GRIDCODE'], categories=sorted(df_final['GRIDCODE'].unique()), ordered=True)
|
||||
df_final.sort_values(['YJDL', 'EJDL', 'GRIDCODE'], inplace=True)
|
||||
|
||||
print("数据处理流程完成!")
|
||||
return df_final
|
||||
|
||||
# 写入EXCEL 表2
|
||||
def write_to_excel_table2(df, output_path, prop_config):
|
||||
"""
|
||||
将处理好的数据写入格式化的 Excel 文件。
|
||||
"""
|
||||
if df.empty:
|
||||
print("警告: 没有数据可以写入 Excel。")
|
||||
return
|
||||
|
||||
print(f"开始生成 Excel 报告到 '{output_path}'...")
|
||||
wb = Workbook()
|
||||
ws = wb.create_sheet("Mysheet", 0)
|
||||
ws.title = "不同土地利用类型属性变化统计"
|
||||
|
||||
# --- a. 定义样式 ---
|
||||
header_font = Font(name='等线', size=11, bold=True)
|
||||
cell_font = Font(name='等线', size=11)
|
||||
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
|
||||
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
|
||||
top=Side(style='thin'), bottom=Side(style='thin'))
|
||||
|
||||
def apply_style(cell_range, font, alignment=None, border=None):
|
||||
for row in ws[cell_range]:
|
||||
for cell in row:
|
||||
cell.font = font
|
||||
if alignment: cell.alignment = alignment
|
||||
if border: cell.border = border
|
||||
|
||||
# --- b. 绘制表头 ---
|
||||
ws.merge_cells('A1:B1'); ws['A1'] = '土地利用类型'
|
||||
ws.merge_cells('C1:E1'); ws['C1'] = '样点统计'
|
||||
ws.merge_cells('F1:G1'); ws['F1'] = '制图统计'
|
||||
|
||||
ws['A2'] = '一级'
|
||||
ws['B2'] = '二级'
|
||||
ws['C2'] = '质地类型'
|
||||
ws['D2'] = '数量/个'
|
||||
ws['E2'] = '占比%'
|
||||
ws['F2'] = '面积/亩'
|
||||
ws['G2'] = '占比%'
|
||||
|
||||
level_dict = prop_config['标准等级']
|
||||
# 创建两个列表来分别存储上段和下段范围
|
||||
upper_ranges = {value: key for key, value in level_dict.items()}
|
||||
|
||||
# --- c. 填充数据 ---
|
||||
current_row = 3
|
||||
|
||||
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
|
||||
|
||||
for yl, group_yl_df in df_to_write.groupby('YJDL', sort=False, observed=False):
|
||||
if group_yl_df.empty:
|
||||
continue
|
||||
|
||||
print(f"正在写入一级地类: {yl}...")
|
||||
yl_start_row = current_row
|
||||
|
||||
# 按二级地类分组
|
||||
for ej, group_ej_df in group_yl_df.groupby('EJDL', sort=False, observed=False):
|
||||
if group_ej_df.empty:
|
||||
continue
|
||||
|
||||
print(f"正在写入二级地类: {ej}...")
|
||||
ej_start_row = current_row
|
||||
|
||||
# 按“土壤质地分级”分组
|
||||
for idx, row_data in group_ej_df.iterrows():
|
||||
# 填充土壤质地分类
|
||||
ws.cell(row=current_row, column=3).value = str(row_data['GRIDCODE'])
|
||||
|
||||
# 填充样点数据
|
||||
ws.cell(row=current_row, column=4).value = row_data['样点数'] if not np.isnan(row_data['样点数']) else '-'
|
||||
ws.cell(row=current_row, column=5).value = round(row_data['样点数占比']*100, 2) if not np.isnan(row_data['样点数占比']) else '-'
|
||||
# 填充制图数据
|
||||
ws.cell(row=current_row, column=6).value = round(row_data['制图面积'], 0) if not np.isnan(row_data['制图面积']) else '-'
|
||||
ws.cell(row=current_row, column=7).value = round(row_data['面积占比']*100, 2) if not np.isnan(row_data['面积占比']) else '-'
|
||||
|
||||
current_row += 1
|
||||
|
||||
# 合并二级地类单元格
|
||||
if ej_start_row <= current_row:
|
||||
ws.merge_cells(start_row=ej_start_row, start_column=2, end_row=current_row-1, end_column=2)
|
||||
ws.cell(row=ej_start_row, column=2).value = ej
|
||||
|
||||
# 一级地类合计行
|
||||
ws.merge_cells(start_row=current_row, start_column=2, end_row=current_row, end_column=3)
|
||||
ws.cell(row=current_row, column=2).value = '合计'
|
||||
ws.cell(row=current_row, column=4).value = round(group_yl_df['样点数'].sum(), 0) if not np.isnan(group_yl_df['样点数'].sum()) else '-'
|
||||
ws.cell(row=current_row, column=5).value = round(group_yl_df['样点数占比'].sum()*100, 2) if not np.isnan(group_yl_df['样点数占比'].sum()) else '-'
|
||||
ws.cell(row=current_row, column=6).value = round(group_yl_df['制图面积'].sum(), 0) if not np.isnan(group_yl_df['制图面积'].sum()) else '-'
|
||||
ws.cell(row=current_row, column=7).value = round(group_yl_df['面积占比'].sum()*100, 2) if not np.isnan(group_yl_df['面积占比'].sum()) else '-'
|
||||
|
||||
# 合并一级地类单元格(修正合并范围)
|
||||
if yl_start_row <= current_row:
|
||||
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
|
||||
ws.cell(row=yl_start_row, column=1).value = yl
|
||||
current_row += 1
|
||||
|
||||
# --- 5. 全区汇总行 ---
|
||||
ws.cell(row=current_row, column=1).value = '全区汇总'
|
||||
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
|
||||
ws.cell(row=current_row, column=4).value = round(df_to_write['样点数'].sum(), 0) if not np.isnan(df_to_write['样点数'].sum()) else '-'
|
||||
ws.cell(row=current_row, column=5).value = round(df_to_write['样点数占比'].sum()*100, 2) if not np.isnan(df_to_write['样点数占比'].sum()) else '-'
|
||||
ws.cell(row=current_row, column=6).value = round(df_to_write['制图面积'].sum(), 0) if not np.isnan(df_to_write['制图面积'].sum()) else '-'
|
||||
ws.cell(row=current_row, column=7).value = round(df_to_write['面积占比'].sum()*100, 2) if not np.isnan(df_to_write['面积占比'].sum()) else '-'
|
||||
|
||||
# --- d. 应用样式和调整列宽 ---
|
||||
max_col_letter = get_column_letter(ws.max_column)
|
||||
if current_row > 1: # 确保有数据才应用样式
|
||||
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
|
||||
apply_style(f'A1:{max_col_letter}2', header_font)
|
||||
|
||||
print("正在自动调整列宽...")
|
||||
|
||||
dims = {}
|
||||
for row in ws.rows:
|
||||
for cell in row:
|
||||
if cell.value:
|
||||
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
|
||||
if get_merge_type(merged_range) == 'column':
|
||||
continue
|
||||
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
|
||||
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
|
||||
# 设置列宽
|
||||
for col, value in dims.items():
|
||||
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
|
||||
|
||||
# --- e. 保存文件 ---
|
||||
wb.save(output_path)
|
||||
print("Excel 报告生成成功!")
|
||||
|
||||
# def main(gdb_path, soil_prop_name, dltb_features, reclassed_feature, output_path,target_areas_df, prop_config):
|
||||
# print(target_areas_df)
|
||||
# df = pd.read_csv(r"D:\ProgramData\ArcGis_Py\测试数据.csv")
|
||||
# output_path = r"E:\@三普属性图出图\测试\AAA.xlsx"
|
||||
# write_to_excel_table2(df,output_path,prop_config)
|
||||
|
||||
|
||||
def main(gdb_path, soil_prop_name, dltb_features, reclassed_feature, output_path,target_areas_df, prop_config):
|
||||
try:
|
||||
# --- 1. 用户配置 ---
|
||||
# 输出配置
|
||||
temp_files = []
|
||||
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土地利用类型土壤.xlsx") # 生成的Excel报告文件路径
|
||||
|
||||
# 设置工作空间和变量
|
||||
arcpy.env.workspace = gdb_path
|
||||
arcpy.env.overwriteOutput = True
|
||||
|
||||
print("开始处理数据...")
|
||||
|
||||
out_table_mean = r"in_memory/out_table_mean"
|
||||
temp_files.append(out_table_mean)
|
||||
if not arcpy.Exists(out_table_mean):
|
||||
# 2.使用交集制表计算每个TRZD的面积
|
||||
arcpy.analysis.TabulateIntersection(dltb_features, "YJDL_EJDL", reclassed_feature, out_table_mean, "gridcode", out_units="SQUARE_METERS")
|
||||
|
||||
dltb_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YJDL_EJDL", "gridcode", "AREA"]))
|
||||
|
||||
|
||||
# 生成表1 土壤属性分级分布 的统计Excel报告
|
||||
final_dataframe = process_data_for_table2(gdb_path, soil_prop_name, dltb_df, target_areas_df)
|
||||
|
||||
write_to_excel_table2(final_dataframe, output_excel_path, prop_config)
|
||||
|
||||
# return df_with_factors
|
||||
except Exception as e:
|
||||
print(f"\n处理过程中发生严重错误: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
temp_files_processor.clean_up_temp_files(temp_files)
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
# --- 4. 主程序入口 ---
|
||||
# if __name__ == "__main__":
|
||||
# df = pd.read_csv(r"D:\ProgramData\ArcGis_Py\测试数据.csv")
|
||||
# output_path = r"E:\@三普属性图出图\测试\AAA.xlsx"
|
||||
# write_to_excel_table2(df,output_path)
|
||||
328
tools/core/soil_prop_stats/B2土地利用类型土壤属性.py
Normal file
328
tools/core/soil_prop_stats/B2土地利用类型土壤属性.py
Normal file
@@ -0,0 +1,328 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import re
|
||||
import arcpy
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font
|
||||
from openpyxl.utils import get_column_letter
|
||||
|
||||
from tools.config.pandas_field_cal_func import calculate_ejdl, calculate_yjdl
|
||||
from tools.core.utils.os_utils import temp_files_processor
|
||||
from tools.core.utils.excel_utils import ExcelStyleUtils
|
||||
|
||||
|
||||
yjdl_order = ["耕地", "园地", "林地", "草地", "其他"]
|
||||
ejdl_order = ["水田", "旱地", "水浇地", "果园", "茶园", "橡胶园", "其他园地"]
|
||||
|
||||
# --- 3. 数据处理与分析 均值---
|
||||
def process_data_for_table2(gdb_path, soil_prop_feature_name, df_dltb, target_areas_df):
|
||||
"""
|
||||
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
|
||||
"""
|
||||
print("开始处理数据...")
|
||||
|
||||
def clean_df(df, columns):
|
||||
for col in columns:
|
||||
df[col] = df[col].astype(str).str.strip()
|
||||
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
|
||||
df.dropna(subset=columns, inplace=True)
|
||||
return df
|
||||
|
||||
# ==a. 处理样点数据,计算“样点均值” ---
|
||||
print("--> 步骤1: 计算样点均值...")
|
||||
field_name = soil_prop_feature_name
|
||||
sample_table_path = os.path.join(gdb_path, soil_prop_feature_name)
|
||||
sample_fields = ['TDLYLX', field_name]
|
||||
df_samples = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sample_table_path, sample_fields, skip_nulls=False))
|
||||
df_samples = clean_df(df_samples, [field_name])
|
||||
|
||||
df_samples["YJDL"] = df_samples['TDLYLX'].apply(calculate_yjdl)
|
||||
df_samples["EJDL"] = df_samples['TDLYLX'].apply(calculate_ejdl)
|
||||
df_samples[field_name] = df_samples[field_name].astype(float)
|
||||
|
||||
# 按 YJDL, EJDL 分组,计算 属性 的均值
|
||||
df_sample_means = df_samples.groupby(['YJDL', 'EJDL'])[field_name].agg(['count', 'max', 'min', 'mean']).reset_index()
|
||||
|
||||
# ==b. 处理制图数据,获各等级制图面积
|
||||
df_dltb["YJDL"] = df_dltb['YJDL_EJDL'].apply(lambda x: x.split('_')[0])
|
||||
df_dltb["EJDL"] = df_dltb["YJDL_EJDL"].apply(lambda x: x.split('_')[1])
|
||||
df_dltb = clean_df(df_dltb, ['YJDL', 'EJDL'])
|
||||
df_dltb.rename(columns={'MEAN': '制图均值', 'COUNT': '制图样点数'}, inplace=True)
|
||||
|
||||
# --- c. 合并数据 ---
|
||||
print("--> 步骤3: 合并数据...")
|
||||
df_skeleton = pd.concat([
|
||||
df_sample_means[['YJDL', 'EJDL']],
|
||||
df_dltb[['YJDL', 'EJDL']]
|
||||
]).drop_duplicates().reset_index(drop=True)
|
||||
|
||||
df_final = pd.merge(df_skeleton, df_sample_means, on=['YJDL', 'EJDL'], how='left')
|
||||
df_final = pd.merge(df_final, df_dltb, on=['YJDL', 'EJDL'], how='left')
|
||||
df_final = pd.merge(df_final, target_areas_df, on=['EJDL'], how='left')
|
||||
|
||||
# (可选) 按“一级地类”和“二级地类”排序
|
||||
in_ejdl_order = ejdl_order + [x for x in df_final['EJDL'].unique() if x not in ejdl_order]
|
||||
df_final["YJDL"] = pd.Categorical(df_final['YJDL'], categories=yjdl_order, ordered=True)
|
||||
df_final["EJDL"] = pd.Categorical(df_final['EJDL'], categories=in_ejdl_order, ordered=True)
|
||||
df_final.sort_values(['YJDL', 'EJDL'], inplace=True)
|
||||
|
||||
print("数据处理流程完成!")
|
||||
return df_final
|
||||
|
||||
# 写入EXCEL 表2
|
||||
def write_to_excel_table2(df, output_path, prop_config:dict, soil_prop_name: str = ''):
|
||||
"""
|
||||
将处理好的数据写入格式化的 Excel 文件。
|
||||
"""
|
||||
if df.empty:
|
||||
print("警告: 没有数据可以写入 Excel。")
|
||||
return
|
||||
|
||||
print(f"开始生成 Excel 报告到 '{output_path}'...")
|
||||
wb = Workbook()
|
||||
ws = wb.create_sheet("Mysheet", 0)
|
||||
ws.title = "不同土地利用类型属性变化统计"
|
||||
|
||||
# 获取属性单位
|
||||
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅','全钾']
|
||||
fsn_props = ['砂粒含量','粉粒含量','黏粒含量','有效土层厚度']
|
||||
prop_name_str = prop_config.get('项目分级','')
|
||||
if prop_name_str:
|
||||
split_name = prop_name_str.split('\n')[0].strip()
|
||||
if split_name in special_prop:
|
||||
prop_name = '1f'
|
||||
elif split_name in fsn_props:
|
||||
prop_name = '0f'
|
||||
else:
|
||||
prop_name = '2f'
|
||||
else:
|
||||
prop_name = '1f'
|
||||
|
||||
prop_unit_str = prop_config.get('分级标准', '')
|
||||
if prop_unit_str:
|
||||
prop_unit = prop_unit_str.split('\n')[1].strip()
|
||||
else:
|
||||
prop_unit = ''
|
||||
|
||||
# --- b. 绘制表头 ---
|
||||
ws.merge_cells('A1:B1'); ws['A1'] = '土地利用类型'
|
||||
ws.merge_cells('C1:E1'); ws['C1'] = '样点统计'
|
||||
ws.merge_cells('F1:G1'); ws['F1'] = '制图统计'
|
||||
|
||||
ws['A2'] = '一级'
|
||||
ws['B2'] = '二级'
|
||||
ws['C2'] = '均值/' + prop_unit
|
||||
ws['D2'] = '范围/' + prop_unit
|
||||
ws['E2'] = '数量/个'
|
||||
ws['F2'] = '均值/' + prop_unit
|
||||
ws['G2'] = '面积/亩'
|
||||
|
||||
# --- c. 填充数据 ---
|
||||
current_row = 3
|
||||
|
||||
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
|
||||
|
||||
filtered_props = ['ECA', 'EMG', 'ACU', 'AZN', 'AFE', 'AMN', 'AMO', 'AB', 'AS1', 'TSE']
|
||||
|
||||
for yl, group_yl_df in df_to_write.groupby('YJDL', sort=False, observed=False):
|
||||
|
||||
print(f"正在写入一级地类: {yl}...")
|
||||
yl_start_row = current_row
|
||||
|
||||
# 遍历该一级地类下的所有“二级地类”
|
||||
for _, row_data in group_yl_df.iterrows():
|
||||
ws.cell(row=current_row, column=2).value = row_data['EJDL']
|
||||
|
||||
# 填充样点数据
|
||||
sample_mean = row_data.get('mean')
|
||||
if pd.notna(sample_mean):
|
||||
ws.cell(row=current_row, column=3).value = f"{sample_mean:.{prop_name}}"
|
||||
ws.cell(row=current_row, column=4).value = f"{row_data.get('min', '-'):.{prop_name}}~{row_data.get('max', '-'):.{prop_name}}"
|
||||
ws.cell(row=current_row, column=5).value = row_data.get('count', '-')
|
||||
else:
|
||||
ws.cell(row=current_row, column=3).value = "-"
|
||||
ws.cell(row=current_row, column=4).value = "-"
|
||||
ws.cell(row=current_row, column=5).value = "-"
|
||||
|
||||
# 填充制图数据
|
||||
map_mean = row_data.get('制图均值')
|
||||
if pd.notna(map_mean):
|
||||
ws.cell(row=current_row, column=6).value = f"{map_mean:.{prop_name}}"
|
||||
ws.cell(row=current_row, column=7).value = f"{row_data.get('面积', '-'):.0f}"
|
||||
else:
|
||||
ws.cell(row=current_row, column=6).value = "-"
|
||||
ws.cell(row=current_row, column=7).value = "-"
|
||||
|
||||
current_row += 1
|
||||
|
||||
# 计算并写入“合计”行
|
||||
if ws.cell(row=current_row-1, column=2).value in ["林地", "草地", "其他"]:
|
||||
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=yl_start_row, end_column=2)
|
||||
ws.cell(row=yl_start_row, column=1).value = yl
|
||||
|
||||
if soil_prop_name in filtered_props:
|
||||
ws.cell(row=yl_start_row, column=6).value = "-"
|
||||
ws.cell(row=yl_start_row, column=7).value = "-"
|
||||
|
||||
continue
|
||||
|
||||
ws.cell(row=current_row, column=2).value = '合计'
|
||||
|
||||
# 计算合计行的均值 (均值的均值)
|
||||
total_count = group_yl_df['count'].sum()
|
||||
weighted_sum = group_yl_df['mean']*group_yl_df['count']
|
||||
if not weighted_sum.empty and total_count != 0:
|
||||
total_sample_mean = weighted_sum.sum()/group_yl_df['count'].sum()
|
||||
else:
|
||||
total_sample_mean = None
|
||||
min_min, max_max = group_yl_df['min'].min(), group_yl_df['max'].max()
|
||||
|
||||
if pd.notna(total_sample_mean):
|
||||
ws.cell(row=current_row, column=3).value = f"{total_sample_mean:.{prop_name}}"
|
||||
ws.cell(row=current_row, column=4).value = f"{min_min:.{prop_name}}~{max_max:.{prop_name}}"
|
||||
ws.cell(row=current_row, column=5).value = f"{total_count:.0f}"
|
||||
else:
|
||||
ws.cell(row=current_row, column=3).value = "-"
|
||||
ws.cell(row=current_row, column=4).value = "-"
|
||||
ws.cell(row=current_row, column=5).value = "-"
|
||||
|
||||
# b. **【核心修正】: 计算合计行的“制图均值”(加权平均)**
|
||||
# 准备加权平均的分子和分母
|
||||
weighted_sum = 0
|
||||
total_count = 0
|
||||
|
||||
# 遍历当前一级地类分组中的每一行
|
||||
for _, row in group_yl_df.iterrows():
|
||||
mean_val = row.get('制图均值')
|
||||
count_val = row.get('制图样点数')
|
||||
|
||||
# 只有当均值和样点数都存在且有效时,才参与计算
|
||||
if pd.notna(mean_val) and pd.notna(count_val) and count_val > 0:
|
||||
weighted_sum += mean_val * count_val # Σ (mean * count)
|
||||
total_count += count_val # Σ (count)
|
||||
|
||||
# 计算加权平均值
|
||||
weighted_avg = (weighted_sum / total_count) if total_count > 0 else 0
|
||||
total_area = group_yl_df['面积'].sum()
|
||||
|
||||
if weighted_avg > 0:
|
||||
ws.cell(row=current_row, column=6).value = f"{weighted_avg:.{prop_name}}"
|
||||
ws.cell(row=current_row, column=7).value = f"{total_area:.0f}"
|
||||
else:
|
||||
ws.cell(row=current_row, column=6).value = "-"
|
||||
ws.cell(row=current_row, column=7).value = "-"
|
||||
|
||||
# 合并“一级地类”单元格
|
||||
if yl_start_row <= current_row:
|
||||
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
|
||||
ws.cell(row=yl_start_row, column=1).value = yl
|
||||
|
||||
current_row += 1
|
||||
|
||||
# 计算全区的均值、范围、数量
|
||||
if soil_prop_name in filtered_props:
|
||||
# 只基于耕地和园地计算全区统计
|
||||
df_for_total = df_to_write[df_to_write['YJDL'].isin(['耕地', '园地'])].copy()
|
||||
print(f"全区统计过滤:仅基于耕地和园地(YJDL in ['耕地', '园地'])")
|
||||
else:
|
||||
df_for_total = df_to_write.copy()
|
||||
|
||||
# 使用 df_for_total 进行后续计算
|
||||
total_weighted_sum = df_for_total['mean'] * df_for_total['count']
|
||||
total_counts = df_for_total['count'].sum()
|
||||
if total_counts > 0:
|
||||
total_mean = total_weighted_sum.sum() / total_counts
|
||||
else:
|
||||
total_mean = None
|
||||
|
||||
if not df_for_total.empty:
|
||||
total_range = f"{df_for_total['min'].min():.{prop_name}}~{df_for_total['max'].max():.{prop_name}}"
|
||||
total_zhitu_weighted_sum = df_for_total['制图均值']*df_for_total['面积']
|
||||
total_areas = df_for_total['面积'].sum()
|
||||
if total_areas > 0:
|
||||
total_zhitu_mean = total_zhitu_weighted_sum.sum() / total_areas
|
||||
else:
|
||||
total_zhitu_mean = None
|
||||
else:
|
||||
total_range = "-"
|
||||
total_zhitu_mean = None
|
||||
total_areas = 0
|
||||
|
||||
# 填充全区统计行
|
||||
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=2)
|
||||
ws.cell(row=current_row, column=1).value = '全区'
|
||||
if pd.notna(total_mean):
|
||||
ws.cell(row=current_row, column=3).value = f"{total_mean:.{prop_name}}"
|
||||
else:
|
||||
ws.cell(row=current_row, column=3).value = "-"
|
||||
ws.cell(row=current_row, column=4).value = total_range
|
||||
ws.cell(row=current_row, column=5).value = f"{total_counts:.0f}" if total_counts > 0 else "-"
|
||||
if pd.notna(total_zhitu_mean):
|
||||
ws.cell(row=current_row, column=6).value = f"{total_zhitu_mean:.{prop_name}}"
|
||||
else:
|
||||
ws.cell(row=current_row, column=6).value = "-"
|
||||
ws.cell(row=current_row, column=7).value = f"{total_areas:.0f}" if total_areas > 0 else "-"
|
||||
|
||||
|
||||
# --- a. 定义样式 ---
|
||||
header_font = Font(name='等线', size=11, bold=True)
|
||||
|
||||
# --- d. 应用样式和调整列宽 ---
|
||||
max_col_letter = get_column_letter(ws.max_column)
|
||||
if current_row > 1: # 确保有数据才应用样式
|
||||
ExcelStyleUtils.set_style(ws,f'A1:{max_col_letter}{current_row}')
|
||||
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
|
||||
|
||||
print("正在自动调整列宽...")
|
||||
|
||||
# 设置列宽
|
||||
ExcelStyleUtils.auto_adjust_column_width(ws)
|
||||
|
||||
# --- e. 保存文件 ---
|
||||
wb.save(output_path)
|
||||
print("Excel 报告生成成功!")
|
||||
|
||||
|
||||
def main(gdb_path, soil_prop_name, dltb_features, soil_prop_tif, output_path,target_areas_df, prop_config):
|
||||
try:
|
||||
# --- 1. 用户配置 ---
|
||||
# 输出配置
|
||||
temp_files = []
|
||||
output_excel_path = os.path.join(output_path, f"{soil_prop_name}土地利用类型土壤.xlsx") # 生成的Excel报告文件路径
|
||||
|
||||
# 设置工作空间和变量
|
||||
arcpy.env.workspace = gdb_path
|
||||
arcpy.env.overwriteOutput = True
|
||||
|
||||
print("开始处理数据...")
|
||||
|
||||
out_table_mean = r"in_memory/out_table_mean"
|
||||
temp_files.append(out_table_mean)
|
||||
if not arcpy.Exists(out_table_mean):
|
||||
# 2.用arcpy.sa.ZonalStatisticsAsTable 以表格进行分区统计
|
||||
arcpy.sa.ZonalStatisticsAsTable(dltb_features, "YJDL_EJDL", soil_prop_tif, out_table_mean, "DATA", "MEAN")
|
||||
|
||||
dltb_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YJDL_EJDL", "MEAN", "COUNT"]))
|
||||
|
||||
|
||||
# 生成表1 土壤属性分级分布 的统计Excel报告
|
||||
final_dataframe = process_data_for_table2(gdb_path, soil_prop_name, dltb_df, target_areas_df)
|
||||
|
||||
# final_dataframe = process_data_for_table5_2(gdb_path, out_table_area, sample_table_name, df_with_factors)
|
||||
write_to_excel_table2(final_dataframe, output_excel_path, prop_config, soil_prop_name)
|
||||
|
||||
# return df_with_factors
|
||||
except Exception as e:
|
||||
print(f"\n处理过程中发生严重错误: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
temp_files_processor.clean_up_temp_files(temp_files)
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
# --- 4. 主程序入口 ---
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
446
tools/core/soil_prop_stats/B3_TRZD12不同土壤类型土壤属性.py
Normal file
446
tools/core/soil_prop_stats/B3_TRZD12不同土壤类型土壤属性.py
Normal file
@@ -0,0 +1,446 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import re
|
||||
import arcpy
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, Border, Side, Alignment
|
||||
from openpyxl.utils import get_column_letter
|
||||
|
||||
from tools.config.pandas_field_cal_func import calculate_muyan, calculate_muzhi
|
||||
|
||||
from tools.config.custom_sort import yl_order, ts_order
|
||||
from tools.core.utils.os_utils import temp_files_processor
|
||||
|
||||
|
||||
# --- 2. 辅助函数 ---
|
||||
# 判断单元格类型
|
||||
def get_merge_type(merged_range):
|
||||
"""
|
||||
判断合并类型
|
||||
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None(不是合并单元格)
|
||||
"""
|
||||
if not merged_range:
|
||||
return None
|
||||
|
||||
min_row, max_row = merged_range.min_row, merged_range.max_row
|
||||
min_col, max_col = merged_range.min_col, merged_range.max_col
|
||||
|
||||
if max_row > min_row and max_col > min_col:
|
||||
return 'both' # 同时跨行和跨列
|
||||
elif max_row > min_row:
|
||||
return 'row' # 行合并(垂直合并)
|
||||
elif max_col > min_col:
|
||||
return 'column' # 列合并(水平合并)
|
||||
else:
|
||||
return None # 实际上不是合并单元格
|
||||
|
||||
# --- 3. 数据处理与分析 均值---
|
||||
def process_data_for_table3(soil_prop_name, df_trlx_sample, df_trlx_zhitu, df_trlx, target_areas_df):
|
||||
"""
|
||||
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
|
||||
"""
|
||||
print("开始处理数据...")
|
||||
|
||||
def clean_df(df, columns) -> pd.DataFrame:
|
||||
for col in columns:
|
||||
df[col] = df[col].astype(str).str.strip()
|
||||
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
|
||||
df.dropna(subset=columns, inplace=True)
|
||||
return df
|
||||
|
||||
# ==a. 处理样点数据,计算“样点均值” ---
|
||||
print("--> 步骤1: 计算样点均值...")
|
||||
field_name = soil_prop_name
|
||||
sample_fields = ['YL', 'TS', field_name]
|
||||
df_samples = clean_df(df_trlx_sample, sample_fields)
|
||||
df_samples["GRIDCODE"] = df_samples[field_name].astype(int)
|
||||
|
||||
# 通过土属计算母岩母质
|
||||
df_samples['母岩'] = df_samples['TS'].apply(calculate_muyan)
|
||||
df_samples['母质'] = df_samples['母岩'].apply(calculate_muzhi)
|
||||
# 按 YJDL, EJDL 分组
|
||||
df_sample_means = df_samples.groupby(['YL', 'TS', 'GRIDCODE']).size().reset_index(name="样点数")
|
||||
total_sample_count = df_sample_means['样点数'].sum()
|
||||
df_sample_means['样点数占比'] = df_sample_means['样点数'] / total_sample_count
|
||||
# df_sample_mymz = df_samples.groupby(['母质', '母岩', 'TZ'])[field_name].agg(['count', 'mean', 'median']).reset_index()
|
||||
# print(df_sample_mymz)
|
||||
|
||||
# ==b. 处理制图数据,获各等级制图面积
|
||||
df_trlx_zhitu["YL"] = df_trlx_zhitu['YL_TS'].apply(lambda x: x.split('_')[0])
|
||||
df_trlx_zhitu["TS"] = df_trlx_zhitu["YL_TS"].apply(lambda x: x.split('_')[1])
|
||||
df_trlx_zhitu.columns = df_trlx_zhitu.columns.str.upper()
|
||||
df_trlx_zhitu = clean_df(df_trlx_zhitu, ['YL', 'TS'])
|
||||
|
||||
df_map_data = df_trlx_zhitu.groupby(["YL","TS", "GRIDCODE"]).agg({"AREA": "sum"}).reset_index()
|
||||
df_map_data['制图面积_原始'] = df_map_data['AREA'] * 0.0015 # 单位:亩
|
||||
|
||||
|
||||
# ==c. 处理制图数据,获各等级制图面积
|
||||
df_trlx = clean_df(df_trlx, ['YL', 'TS'])
|
||||
df_trlx["面积"] = df_trlx["Shape@Area"] * 0.0015
|
||||
|
||||
# 拿到目标df总面积,计算比例进行平差
|
||||
target_areas = target_areas_df['面积'].sum()
|
||||
original_area = df_trlx['面积'].sum()
|
||||
adjusted_area_yz = target_areas / original_area
|
||||
df_trlx["面积"] = df_trlx["面积"] * adjusted_area_yz
|
||||
df_trlx_area = df_trlx.groupby(['YL', 'TS'])['面积'].sum().reset_index()
|
||||
df_trlx_area['面积'] = pd.to_numeric(df_trlx_area['面积'], errors='coerce').fillna(0)
|
||||
# ==========================
|
||||
# 第三步:按二级地类分组计算平差系数
|
||||
# 先计算每个二级地类的原始合计面积
|
||||
ts_original_sum = df_map_data.groupby('TS')['制图面积_原始'].sum().reset_index()
|
||||
ts_original_sum.rename(columns={'制图面积_原始': '原始合计面积'}, inplace=True)
|
||||
# 合并目标面积
|
||||
ts_adj = pd.merge(ts_original_sum, df_trlx_area, on='TS', how='left')
|
||||
ts_adj.rename(columns={'面积': '目标合计面积'}, inplace=True)
|
||||
# 填充无目标面积的二级地类(目标面积=原始面积,平差系数=1)
|
||||
ts_adj['目标合计面积'] = ts_adj['目标合计面积'].fillna(ts_adj['原始合计面积'])
|
||||
# 计算平差系数(目标面积 / 原始面积,避免除以0)
|
||||
ts_adj['平差系数'] = ts_adj['目标合计面积'] / ts_adj['原始合计面积'].replace(0, 1)
|
||||
ts_adj['平差系数'] = ts_adj['平差系数'].fillna(1) # 极端情况填充1
|
||||
|
||||
# 第四步:应用平差系数到每个质地级别的制图面积
|
||||
df_map_data = pd.merge(df_map_data, ts_adj[['TS', '平差系数']], on='TS', how='left')
|
||||
df_map_data['平差系数'] = df_map_data['平差系数'].fillna(1) # 未匹配到的二级地类系数=1
|
||||
# 计算平差后的制图面积
|
||||
df_map_data['制图面积'] = df_map_data['制图面积_原始'] * df_map_data['平差系数']
|
||||
# 重新计算面积占比(基于平差后的面积)
|
||||
total_adjusted_area = df_map_data['制图面积'].sum()
|
||||
df_map_data['面积占比'] = df_map_data['制图面积'] / total_adjusted_area
|
||||
df_map_data = clean_df(df_map_data, ['YL', 'TS'])
|
||||
|
||||
# --- c. 合并数据 ---
|
||||
print("--> 步骤3: 合并数据...")
|
||||
df_skeleton = pd.concat([
|
||||
df_sample_means[['YL', 'TS', 'GRIDCODE']],
|
||||
df_map_data[['YL', 'TS', 'GRIDCODE']]
|
||||
]).drop_duplicates().reset_index(drop=True)
|
||||
|
||||
df_final = pd.merge(df_skeleton, df_sample_means, on=['YL', 'TS', 'GRIDCODE'], how='left')
|
||||
df_final = pd.merge(df_final, df_map_data, on=['YL', 'TS', 'GRIDCODE'], how='left')
|
||||
|
||||
# (可选) 按“亚类”和“土属”排序
|
||||
in_yl_order = yl_order + [x for x in df_final['YL'].unique() if x not in yl_order]
|
||||
in_ts_order = ts_order + [x for x in df_final['TS'].unique() if x not in ts_order]
|
||||
df_final["YL"] = pd.Categorical(df_final['YL'], categories=in_yl_order, ordered=True)
|
||||
df_final["TS"] = pd.Categorical(df_final['TS'], categories=in_ts_order, ordered=True)
|
||||
df_final["GRIDCODE"] = pd.Categorical(df_final['GRIDCODE'], categories=sorted(df_final['GRIDCODE'].unique()), ordered=True)
|
||||
df_final.sort_values(['YL', 'TS', 'GRIDCODE'], inplace=True)
|
||||
|
||||
print("数据处理流程完成!")
|
||||
return df_final
|
||||
# return df_final, df_sample_mymz
|
||||
|
||||
# 写入EXCEL 表2
|
||||
def write_to_excel_table3(df, output_path, prop_config:dict):
|
||||
"""
|
||||
将处理好的数据写入格式化的 Excel 文件。
|
||||
"""
|
||||
if df.empty:
|
||||
print("警告: 没有数据可以写入 Excel。")
|
||||
return
|
||||
|
||||
print(f"开始生成 Excel 报告到 '{output_path}'...")
|
||||
wb = Workbook()
|
||||
ws = wb.create_sheet("Mysheet", 0)
|
||||
ws.title = "不同土壤类型属性变化统计"
|
||||
|
||||
# --- a. 定义样式 ---
|
||||
header_font = Font(name='等线', size=11, bold=True)
|
||||
cell_font = Font(name='等线', size=11)
|
||||
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
|
||||
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
|
||||
top=Side(style='thin'), bottom=Side(style='thin'))
|
||||
|
||||
def apply_style(cell_range, font, alignment=None, border=None):
|
||||
for row in ws[cell_range]:
|
||||
for cell in row:
|
||||
cell.font = font
|
||||
if alignment: cell.alignment = alignment
|
||||
if border: cell.border = border
|
||||
|
||||
# --- b. 绘制表头 ---
|
||||
ws.merge_cells('A1:B1'); ws['A1'] = '土壤类型'
|
||||
ws.merge_cells('C1:E1'); ws['C1'] = '样点统计'
|
||||
ws.merge_cells('F1:G1'); ws['F1'] = '制图统计'
|
||||
|
||||
ws['A2'] = '亚类'
|
||||
ws['B2'] = '土属'
|
||||
ws['C2'] = '质地类型'
|
||||
ws['D2'] = '数量/个'
|
||||
ws['E2'] = '占比%'
|
||||
ws['F2'] = '面积/亩'
|
||||
ws['G2'] = '占比%'
|
||||
|
||||
level_dict = prop_config['标准等级']
|
||||
# 创建两个列表来分别存储上段和下段范围
|
||||
upper_ranges = {value: key for key, value in level_dict.items()}
|
||||
|
||||
# --- c. 填充数据 ---
|
||||
current_row = 3
|
||||
|
||||
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
|
||||
|
||||
for yl, group_yl_df in df_to_write.groupby('YL', sort=False, observed=True):
|
||||
if group_yl_df.empty:
|
||||
continue
|
||||
|
||||
print(f"正在写入亚类: {yl}...")
|
||||
yl_start_row = current_row
|
||||
|
||||
# 按二级地类分组
|
||||
for ts, group_ts_df in group_yl_df.groupby('TS', sort=False, observed=False):
|
||||
if group_ts_df.empty:
|
||||
continue
|
||||
|
||||
print(f"正在写入二级地类: {ts}...")
|
||||
ts_start_row = current_row
|
||||
|
||||
# 遍历该亚类下下的所有“土属”
|
||||
for _, row_data in group_ts_df.iterrows():
|
||||
ws.cell(row=current_row, column=3).value = upper_ranges.get(str(row_data['GRIDCODE']), '-')
|
||||
|
||||
# 填充样点数据
|
||||
ws.cell(row=current_row, column=4).value = row_data['样点数'] if not np.isnan(row_data['样点数']) else '-'
|
||||
ws.cell(row=current_row, column=5).value = round(row_data['样点数占比']*100, 2) if not np.isnan(row_data['样点数占比']) else '-'
|
||||
# 填充制图数据
|
||||
ws.cell(row=current_row, column=6).value = round(row_data['制图面积'], 0) if not np.isnan(row_data['制图面积']) else '-'
|
||||
ws.cell(row=current_row, column=7).value = round(row_data['面积占比']*100, 2) if not np.isnan(row_data['面积占比']) else '-'
|
||||
|
||||
current_row += 1
|
||||
|
||||
# 合并二级地类单元格
|
||||
if ts_start_row <= current_row:
|
||||
ws.merge_cells(start_row=ts_start_row, start_column=2, end_row=current_row-1, end_column=2)
|
||||
ws.cell(row=ts_start_row, column=2).value = ts
|
||||
|
||||
# 合并“一级地类”单元格
|
||||
if yl_start_row <= current_row:
|
||||
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row-1, end_column=1)
|
||||
ws.cell(row=yl_start_row, column=1).value = yl
|
||||
|
||||
# 计算全区的均值、范围、数量
|
||||
total_areas = df_to_write['制图面积'].sum()
|
||||
|
||||
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
|
||||
ws.cell(row=current_row, column=1).value = '全区'
|
||||
ws.cell(row=current_row, column=4).value = df_to_write['样点数'].sum()
|
||||
ws.cell(row=current_row, column=5).value = round(df_to_write['样点数占比'].sum()*100, 2)
|
||||
ws.cell(row=current_row, column=6).value = round(total_areas, 0)
|
||||
ws.cell(row=current_row, column=7).value = round(df_to_write['面积占比'].sum()*100, 2)
|
||||
|
||||
# --- d. 应用样式和调整列宽 ---
|
||||
max_col_letter = get_column_letter(ws.max_column)
|
||||
if current_row > 1: # 确保有数据才应用样式
|
||||
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
|
||||
apply_style(f'A1:{max_col_letter}2', header_font)
|
||||
|
||||
print("正在自动调整列宽...")
|
||||
|
||||
dims = {}
|
||||
for row in ws.rows:
|
||||
for cell in row:
|
||||
if cell.value:
|
||||
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
|
||||
if get_merge_type(merged_range) == 'column':
|
||||
continue
|
||||
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
|
||||
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
|
||||
# 设置列宽
|
||||
for col, value in dims.items():
|
||||
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
|
||||
|
||||
# --- e. 保存文件 ---
|
||||
wb.save(output_path)
|
||||
print("Excel 报告生成成功!")
|
||||
|
||||
# 母岩母质表
|
||||
def write_to_excel_table4(df:pd.DataFrame, output_path, prop_config):
|
||||
if df.empty:
|
||||
print("警告: 没有数据可以写入 Excel。")
|
||||
return
|
||||
|
||||
wb = Workbook()
|
||||
ws = wb.create_sheet("Mysheet", 0)
|
||||
ws.title = "母岩母质土壤属性统计"
|
||||
|
||||
# 获取属性单位
|
||||
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅','全钾','沙粒','粉粒','粘粒']
|
||||
prop_name_str = prop_config.get('项目分级','')
|
||||
if prop_name_str:
|
||||
prop_name = prop_name_str.split('\n')[0].strip() in special_prop
|
||||
else:
|
||||
prop_name = False
|
||||
|
||||
prop_unit_str = prop_config.get('分级标准', '')
|
||||
if prop_unit_str:
|
||||
prop_unit = prop_unit_str.split('\n')[1].strip()
|
||||
else:
|
||||
prop_unit = ''
|
||||
|
||||
# --- a. 定义样式 ---
|
||||
header_font = Font(name='等线', size=11, bold=True)
|
||||
cell_font = Font(name='等线', size=11)
|
||||
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
|
||||
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
|
||||
top=Side(style='thin'), bottom=Side(style='thin'))
|
||||
|
||||
def apply_style(cell_range, font, alignment=None, border=None):
|
||||
for row in ws[cell_range]:
|
||||
for cell in row:
|
||||
cell.font = font
|
||||
if alignment: cell.alignment = alignment
|
||||
if border: cell.border = border
|
||||
|
||||
# 写入表头
|
||||
headers = ['母岩母质','', '土种类型', '样点统计', '']
|
||||
ws.append(headers)
|
||||
ws.append(['', '', '', f'均值/{prop_unit}', '数量/个'])
|
||||
|
||||
# 合并表头单元格
|
||||
ws.merge_cells('A1:B2') # 母岩母质
|
||||
ws.merge_cells('C1:C2') # 土种类型
|
||||
ws.merge_cells('D1:E1') # 样点统计
|
||||
|
||||
current_row = 3
|
||||
|
||||
# 按母质和母岩进行分组
|
||||
grouped = df.groupby(['母质', '母岩']).agg({
|
||||
'TZ': lambda x: ','.join(x), # 将土种名称用逗号连接
|
||||
'mean': 'mean', # 计算均值
|
||||
'count': 'sum' # 计算总数
|
||||
}).reset_index()
|
||||
|
||||
parent_materials = grouped['母质'].unique()
|
||||
|
||||
for parent_material in parent_materials:
|
||||
|
||||
parent_material_row = current_row
|
||||
|
||||
if parent_material == '未知':
|
||||
continue
|
||||
|
||||
material_group = grouped[grouped['母质'] == parent_material]
|
||||
# 写入母岩母质分组(只在第一行显示)
|
||||
first_row_in_group = True
|
||||
|
||||
for _, row_data in material_group.iterrows():
|
||||
if first_row_in_group:
|
||||
# 第一行显示母岩母质名称
|
||||
ws.cell(row=current_row, column=1, value=parent_material)
|
||||
first_row_in_group = False
|
||||
else:
|
||||
# 后续行留空
|
||||
ws.cell(row=current_row, column=1, value='')
|
||||
|
||||
# 写入母岩类型
|
||||
ws.cell(row=current_row, column=2, value=row_data['母岩'])
|
||||
|
||||
# 写入土种类型(所有土种用逗号连接)
|
||||
ws.cell(row=current_row, column=3, value=row_data['TZ'])
|
||||
|
||||
# 写入统计数据
|
||||
ws.cell(row=current_row, column=4, value=round(row_data['mean'], 1))
|
||||
ws.cell(row=current_row, column=5, value=row_data['count'])
|
||||
|
||||
current_row += 1
|
||||
|
||||
# 合并母岩母质分组
|
||||
if parent_material_row < current_row:
|
||||
ws.merge_cells(start_row=parent_material_row, start_column=1, end_row=current_row - 1, end_column=1)
|
||||
|
||||
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
|
||||
ws.cell(row=current_row, column=1, value='全区')
|
||||
|
||||
# --- d. 应用样式和调整列宽 ---
|
||||
max_col_letter = get_column_letter(ws.max_column)
|
||||
if current_row > 1: # 确保有数据才应用样式
|
||||
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
|
||||
apply_style(f'A1:{max_col_letter}2', header_font)
|
||||
|
||||
# 设置列宽
|
||||
ws.column_dimensions["A"].width = 20
|
||||
ws.column_dimensions["B"].width = 20
|
||||
ws.column_dimensions["C"].width = 30
|
||||
ws.column_dimensions["D"].width = 20
|
||||
ws.column_dimensions["E"].width = 20
|
||||
|
||||
# 保存文件
|
||||
wb.save(output_path)
|
||||
print(f"数据已成功写入到 {output_path}")
|
||||
|
||||
|
||||
def main(gdb_path, soil_prop_name, trlx_features, reclassed_feature, output_path,target_areas_df, prop_config):
|
||||
try:
|
||||
# --- 1. 用户配置 ---
|
||||
# 输出配置
|
||||
temp_files = []
|
||||
output_excel_path = os.path.join(output_path,f"{soil_prop_name}不同土壤类型土壤.xlsx") # 生成的Excel报告文件路径
|
||||
# output_excel4_path = os.path.join(output_path,f"{soil_prop_name}不同母岩母质土壤属性.xlsx")
|
||||
soil_prop_features = os.path.join(gdb_path,soil_prop_name)
|
||||
|
||||
# 设置工作空间和变量
|
||||
arcpy.env.workspace = gdb_path
|
||||
arcpy.env.overwriteOutput = True
|
||||
|
||||
print("开始处理数据...")
|
||||
|
||||
temp_out_features = r"in_memory/temp_out_type_features"
|
||||
out_table_mean = r"in_memory/out_table_type_mean"
|
||||
temp_files.append(temp_out_features)
|
||||
temp_files.append(out_table_mean)
|
||||
|
||||
# 2. 用样点进行空间连接到土壤类型图斑
|
||||
fields_to_keep = {
|
||||
soil_prop_features: [soil_prop_name],
|
||||
trlx_features: ["YL", "TS", "TZ"],
|
||||
}
|
||||
|
||||
field_mappings = arcpy.FieldMappings()
|
||||
|
||||
for join_features in fields_to_keep.keys():
|
||||
for field_name in fields_to_keep[join_features]:
|
||||
try:
|
||||
field_map = arcpy.FieldMap()
|
||||
field_map.addInputField(join_features, field_name)
|
||||
field_map.mergeRule = "First" # 对所有连接字段使用 "First" 规则
|
||||
field_mappings.addFieldMap(field_map)
|
||||
except Exception as e:
|
||||
print(f"警告: 添加字段 '{field_name}' (来自 '{join_features}') 时出错,将跳过。错误信息: {e}")
|
||||
# 空间连接
|
||||
arcpy.analysis.SpatialJoin(soil_prop_features, trlx_features, temp_out_features, "JOIN_ONE_TO_ONE", "KEEP_ALL",field_mappings, "INTERSECT")
|
||||
|
||||
# 3. 交集制表计算每个TRZD的面积
|
||||
arcpy.analysis.TabulateIntersection(trlx_features, "YL_TS", reclassed_feature, out_table_mean, "gridcode", out_units="SQUARE_METERS")
|
||||
|
||||
trlx_zhitu_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YL_TS", "gridcode", "AREA"]))
|
||||
trlx_sample_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(temp_out_features, ["YL", "TS", "TZ", soil_prop_name]))
|
||||
|
||||
# 获取土壤类型图斑面积
|
||||
trlx_area_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(trlx_features, ["YL", "TS", "Shape@Area"]))
|
||||
|
||||
# 处理表3数据
|
||||
final_dataframe = process_data_for_table3(soil_prop_name,trlx_sample_df, trlx_zhitu_df, trlx_area_df, target_areas_df)
|
||||
# print(final_dataframe)
|
||||
|
||||
# 生成表3
|
||||
write_to_excel_table3(final_dataframe, output_excel_path, prop_config)
|
||||
# 母岩母质表
|
||||
# write_to_excel_table4(df_mymz, output_excel4_path, prop_config)
|
||||
|
||||
# return df_with_factors
|
||||
except Exception as e:
|
||||
print(f"\n处理过程中发生严重错误: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
temp_files_processor.clean_up_temp_files(temp_files)
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
# --- 4. 主程序入口 ---
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
465
tools/core/soil_prop_stats/B3_TRZD不同土壤类型土壤属性.py
Normal file
465
tools/core/soil_prop_stats/B3_TRZD不同土壤类型土壤属性.py
Normal file
@@ -0,0 +1,465 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import re
|
||||
import arcpy
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, Border, Side, Alignment
|
||||
from openpyxl.utils import get_column_letter
|
||||
|
||||
from tools.config.pandas_field_cal_func import calculate_muyan, calculate_muzhi
|
||||
|
||||
from tools.config.custom_sort import yl_order, ts_order
|
||||
from tools.core.utils.os_utils import temp_files_processor
|
||||
|
||||
|
||||
# --- 2. 辅助函数 ---
|
||||
def get_prop_level(prop_level):
|
||||
"""根据输入值判断 返回等级"""
|
||||
if pd.isna(prop_level) or prop_level == 0:
|
||||
return "-"
|
||||
# 请根据您的实际分级标准调整这里的阈值
|
||||
if int(prop_level) == 5 or prop_level == "砂质":
|
||||
return "砂质"
|
||||
elif int(prop_level) == 4 or prop_level == "砂壤质":
|
||||
return "砂壤质"
|
||||
elif int(prop_level) == 3 or prop_level == "壤质":
|
||||
return "壤质"
|
||||
elif int(prop_level) == 1 or prop_level == "黏壤质":
|
||||
return "黏壤质"
|
||||
elif int(prop_level) == 2 or prop_level == "黏质":
|
||||
return "黏质"
|
||||
else:
|
||||
return "-"
|
||||
|
||||
# 判断单元格类型
|
||||
def get_merge_type(merged_range):
|
||||
"""
|
||||
判断合并类型
|
||||
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None(不是合并单元格)
|
||||
"""
|
||||
if not merged_range:
|
||||
return None
|
||||
|
||||
min_row, max_row = merged_range.min_row, merged_range.max_row
|
||||
min_col, max_col = merged_range.min_col, merged_range.max_col
|
||||
|
||||
if max_row > min_row and max_col > min_col:
|
||||
return 'both' # 同时跨行和跨列
|
||||
elif max_row > min_row:
|
||||
return 'row' # 行合并(垂直合并)
|
||||
elif max_col > min_col:
|
||||
return 'column' # 列合并(水平合并)
|
||||
else:
|
||||
return None # 实际上不是合并单元格
|
||||
|
||||
# --- 3. 数据处理与分析 均值---
|
||||
def process_data_for_table3(soil_prop_name, df_trlx_sample, df_trlx_zhitu, df_trlx, target_areas_df):
|
||||
"""
|
||||
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
|
||||
"""
|
||||
print("开始处理数据...")
|
||||
|
||||
def clean_df(df, columns) -> pd.DataFrame:
|
||||
for col in columns:
|
||||
df[col] = df[col].astype(str).str.strip()
|
||||
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
|
||||
df.dropna(subset=columns, inplace=True)
|
||||
return df
|
||||
|
||||
# ==a. 处理样点数据,计算“样点均值” ---
|
||||
print("--> 步骤1: 计算样点均值...")
|
||||
field_name = soil_prop_name
|
||||
sample_fields = ['YL', 'TS', field_name]
|
||||
df_samples = clean_df(df_trlx_sample, sample_fields)
|
||||
df_samples["GRIDCODE"] = df_samples[field_name]
|
||||
|
||||
# 通过土属计算母岩母质
|
||||
df_samples['母岩'] = df_samples['TS'].apply(calculate_muyan)
|
||||
df_samples['母质'] = df_samples['母岩'].apply(calculate_muzhi)
|
||||
# 按 YJDL, EJDL 分组
|
||||
df_sample_means = df_samples.groupby(['YL', 'TS', 'GRIDCODE']).size().reset_index(name="样点数")
|
||||
total_sample_count = df_sample_means['样点数'].sum()
|
||||
df_sample_means['样点数占比'] = df_sample_means['样点数'] / total_sample_count
|
||||
# df_sample_mymz = df_samples.groupby(['母质', '母岩', 'TZ'])[field_name].agg(['count', 'mean', 'median']).reset_index()
|
||||
# print(df_sample_mymz)
|
||||
|
||||
# ==b. 处理制图数据,获各等级制图面积
|
||||
df_trlx_zhitu["YL"] = df_trlx_zhitu['YL_TS'].apply(lambda x: x.split('_')[0])
|
||||
df_trlx_zhitu["TS"] = df_trlx_zhitu["YL_TS"].apply(lambda x: x.split('_')[1])
|
||||
df_trlx_zhitu.columns = df_trlx_zhitu.columns.str.upper()
|
||||
df_trlx_zhitu = clean_df(df_trlx_zhitu, ['YL', 'TS'])
|
||||
df_trlx_zhitu['GRIDCODE'] = df_trlx_zhitu['GRIDCODE'].apply(get_prop_level)
|
||||
|
||||
df_map_data = df_trlx_zhitu.groupby(["YL","TS", "GRIDCODE"]).agg({"AREA": "sum"}).reset_index()
|
||||
df_map_data['制图面积_原始'] = df_map_data['AREA'] * 0.0015 # 单位:亩
|
||||
|
||||
|
||||
# ==c. 处理制图数据,获各等级制图面积
|
||||
df_trlx = clean_df(df_trlx, ['YL', 'TS'])
|
||||
df_trlx["面积"] = df_trlx["Shape@Area"] * 0.0015
|
||||
|
||||
# 拿到目标df总面积,计算比例进行平差
|
||||
target_areas = target_areas_df['面积'].sum()
|
||||
original_area = df_trlx['面积'].sum()
|
||||
adjusted_area_yz = target_areas / original_area
|
||||
df_trlx["面积"] = df_trlx["面积"] * adjusted_area_yz
|
||||
df_trlx_area = df_trlx.groupby(['YL', 'TS'])['面积'].sum().reset_index()
|
||||
df_trlx_area['面积'] = pd.to_numeric(df_trlx_area['面积'], errors='coerce').fillna(0)
|
||||
# ==========================
|
||||
# 第三步:按二级地类分组计算平差系数
|
||||
# 先计算每个二级地类的原始合计面积
|
||||
ts_original_sum = df_map_data.groupby('TS')['制图面积_原始'].sum().reset_index()
|
||||
ts_original_sum.rename(columns={'制图面积_原始': '原始合计面积'}, inplace=True)
|
||||
# 合并目标面积
|
||||
ts_adj = pd.merge(ts_original_sum, df_trlx_area, on='TS', how='left')
|
||||
ts_adj.rename(columns={'面积': '目标合计面积'}, inplace=True)
|
||||
# 填充无目标面积的二级地类(目标面积=原始面积,平差系数=1)
|
||||
ts_adj['目标合计面积'] = ts_adj['目标合计面积'].fillna(ts_adj['原始合计面积'])
|
||||
# 计算平差系数(目标面积 / 原始面积,避免除以0)
|
||||
ts_adj['平差系数'] = ts_adj['目标合计面积'] / ts_adj['原始合计面积'].replace(0, 1)
|
||||
ts_adj['平差系数'] = ts_adj['平差系数'].fillna(1) # 极端情况填充1
|
||||
|
||||
# 第四步:应用平差系数到每个质地级别的制图面积
|
||||
df_map_data = pd.merge(df_map_data, ts_adj[['TS', '平差系数']], on='TS', how='left')
|
||||
df_map_data['平差系数'] = df_map_data['平差系数'].fillna(1) # 未匹配到的二级地类系数=1
|
||||
# 计算平差后的制图面积
|
||||
df_map_data['制图面积'] = df_map_data['制图面积_原始'] * df_map_data['平差系数']
|
||||
# 重新计算面积占比(基于平差后的面积)
|
||||
total_adjusted_area = df_map_data['制图面积'].sum()
|
||||
df_map_data['面积占比'] = df_map_data['制图面积'] / total_adjusted_area
|
||||
df_map_data = clean_df(df_map_data, ['YL', 'TS'])
|
||||
|
||||
# --- c. 合并数据 ---
|
||||
print("--> 步骤3: 合并数据...")
|
||||
df_skeleton = pd.concat([
|
||||
df_sample_means[['YL', 'TS', 'GRIDCODE']],
|
||||
df_map_data[['YL', 'TS', 'GRIDCODE']]
|
||||
]).drop_duplicates().reset_index(drop=True)
|
||||
|
||||
df_final = pd.merge(df_skeleton, df_sample_means, on=['YL', 'TS', 'GRIDCODE'], how='left')
|
||||
df_final = pd.merge(df_final, df_map_data, on=['YL', 'TS', 'GRIDCODE'], how='left')
|
||||
|
||||
# (可选) 按“亚类”和“土属”排序
|
||||
in_yl_order = yl_order + [x for x in df_final['YL'].unique() if x not in yl_order]
|
||||
in_ts_order = ts_order + [x for x in df_final['TS'].unique() if x not in ts_order]
|
||||
df_final["YL"] = pd.Categorical(df_final['YL'], categories=in_yl_order, ordered=True)
|
||||
df_final["TS"] = pd.Categorical(df_final['TS'], categories=in_ts_order, ordered=True)
|
||||
df_final["GRIDCODE"] = pd.Categorical(df_final['GRIDCODE'], categories=sorted(df_final['GRIDCODE'].unique()), ordered=True)
|
||||
df_final.sort_values(['YL', 'TS', 'GRIDCODE'], inplace=True)
|
||||
|
||||
print("数据处理流程完成!")
|
||||
return df_final
|
||||
# return df_final, df_sample_mymz
|
||||
|
||||
# 写入EXCEL 表2
|
||||
def write_to_excel_table3(df, output_path, prop_config:dict):
|
||||
"""
|
||||
将处理好的数据写入格式化的 Excel 文件。
|
||||
"""
|
||||
if df.empty:
|
||||
print("警告: 没有数据可以写入 Excel。")
|
||||
return
|
||||
|
||||
print(f"开始生成 Excel 报告到 '{output_path}'...")
|
||||
wb = Workbook()
|
||||
ws = wb.create_sheet("Mysheet", 0)
|
||||
ws.title = "不同土壤类型属性变化统计"
|
||||
|
||||
# --- a. 定义样式 ---
|
||||
header_font = Font(name='等线', size=11, bold=True)
|
||||
cell_font = Font(name='等线', size=11)
|
||||
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
|
||||
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
|
||||
top=Side(style='thin'), bottom=Side(style='thin'))
|
||||
|
||||
def apply_style(cell_range, font, alignment=None, border=None):
|
||||
for row in ws[cell_range]:
|
||||
for cell in row:
|
||||
cell.font = font
|
||||
if alignment: cell.alignment = alignment
|
||||
if border: cell.border = border
|
||||
|
||||
# --- b. 绘制表头 ---
|
||||
ws.merge_cells('A1:B1'); ws['A1'] = '土壤类型'
|
||||
ws.merge_cells('C1:E1'); ws['C1'] = '样点统计'
|
||||
ws.merge_cells('F1:G1'); ws['F1'] = '制图统计'
|
||||
|
||||
ws['A2'] = '亚类'
|
||||
ws['B2'] = '土属'
|
||||
ws['C2'] = '质地类型'
|
||||
ws['D2'] = '数量/个'
|
||||
ws['E2'] = '占比%'
|
||||
ws['F2'] = '面积/亩'
|
||||
ws['G2'] = '占比%'
|
||||
|
||||
level_dict = prop_config['标准等级']
|
||||
# 创建两个列表来分别存储上段和下段范围
|
||||
upper_ranges = {value: key for key, value in level_dict.items()}
|
||||
|
||||
# --- c. 填充数据 ---
|
||||
current_row = 3
|
||||
|
||||
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
|
||||
|
||||
for yl, group_yl_df in df_to_write.groupby('YL', sort=False, observed=True):
|
||||
if group_yl_df.empty:
|
||||
continue
|
||||
|
||||
print(f"正在写入亚类: {yl}...")
|
||||
yl_start_row = current_row
|
||||
|
||||
# 按二级地类分组
|
||||
for ts, group_ts_df in group_yl_df.groupby('TS', sort=False, observed=False):
|
||||
if group_ts_df.empty:
|
||||
continue
|
||||
|
||||
print(f"正在写入二级地类: {ts}...")
|
||||
ts_start_row = current_row
|
||||
|
||||
# 遍历该亚类下下的所有“土属”
|
||||
for _, row_data in group_ts_df.iterrows():
|
||||
ws.cell(row=current_row, column=3).value = row_data['GRIDCODE']
|
||||
|
||||
# 填充样点数据
|
||||
ws.cell(row=current_row, column=4).value = row_data['样点数'] if not np.isnan(row_data['样点数']) else '-'
|
||||
ws.cell(row=current_row, column=5).value = round(row_data['样点数占比']*100, 2) if not np.isnan(row_data['样点数占比']) else '-'
|
||||
# 填充制图数据
|
||||
ws.cell(row=current_row, column=6).value = round(row_data['制图面积'], 0) if not np.isnan(row_data['制图面积']) else '-'
|
||||
ws.cell(row=current_row, column=7).value = round(row_data['面积占比']*100, 2) if not np.isnan(row_data['面积占比']) else '-'
|
||||
|
||||
current_row += 1
|
||||
|
||||
# 合并二级地类单元格
|
||||
if ts_start_row <= current_row:
|
||||
ws.merge_cells(start_row=ts_start_row, start_column=2, end_row=current_row-1, end_column=2)
|
||||
ws.cell(row=ts_start_row, column=2).value = ts
|
||||
|
||||
# 合并“一级地类”单元格
|
||||
if yl_start_row <= current_row:
|
||||
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row-1, end_column=1)
|
||||
ws.cell(row=yl_start_row, column=1).value = yl
|
||||
|
||||
# 计算全区的均值、范围、数量
|
||||
total_areas = df_to_write['制图面积'].sum()
|
||||
|
||||
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
|
||||
ws.cell(row=current_row, column=1).value = '全区'
|
||||
ws.cell(row=current_row, column=4).value = df_to_write['样点数'].sum()
|
||||
ws.cell(row=current_row, column=5).value = round(df_to_write['样点数占比'].sum()*100, 2)
|
||||
ws.cell(row=current_row, column=6).value = round(total_areas, 0)
|
||||
ws.cell(row=current_row, column=7).value = round(df_to_write['面积占比'].sum()*100, 2)
|
||||
|
||||
# --- d. 应用样式和调整列宽 ---
|
||||
max_col_letter = get_column_letter(ws.max_column)
|
||||
if current_row > 1: # 确保有数据才应用样式
|
||||
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
|
||||
apply_style(f'A1:{max_col_letter}2', header_font)
|
||||
|
||||
print("正在自动调整列宽...")
|
||||
|
||||
dims = {}
|
||||
for row in ws.rows:
|
||||
for cell in row:
|
||||
if cell.value:
|
||||
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
|
||||
if get_merge_type(merged_range) == 'column':
|
||||
continue
|
||||
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
|
||||
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
|
||||
# 设置列宽
|
||||
for col, value in dims.items():
|
||||
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
|
||||
|
||||
# --- e. 保存文件 ---
|
||||
wb.save(output_path)
|
||||
print("Excel 报告生成成功!")
|
||||
|
||||
# 母岩母质表
|
||||
def write_to_excel_table4(df:pd.DataFrame, output_path, prop_config):
|
||||
if df.empty:
|
||||
print("警告: 没有数据可以写入 Excel。")
|
||||
return
|
||||
|
||||
wb = Workbook()
|
||||
ws = wb.create_sheet("Mysheet", 0)
|
||||
ws.title = "母岩母质土壤属性统计"
|
||||
|
||||
# 获取属性单位
|
||||
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅','全钾','沙粒','粉粒','粘粒']
|
||||
prop_name_str = prop_config.get('项目分级','')
|
||||
if prop_name_str:
|
||||
prop_name = prop_name_str.split('\n')[0].strip() in special_prop
|
||||
else:
|
||||
prop_name = False
|
||||
|
||||
prop_unit_str = prop_config.get('分级标准', '')
|
||||
if prop_unit_str:
|
||||
prop_unit = prop_unit_str.split('\n')[1].strip()
|
||||
else:
|
||||
prop_unit = ''
|
||||
|
||||
# --- a. 定义样式 ---
|
||||
header_font = Font(name='等线', size=11, bold=True)
|
||||
cell_font = Font(name='等线', size=11)
|
||||
center_align = Alignment(horizontal='center', vertical='center', wrap_text=True)
|
||||
thin_border = Border(left=Side(style='thin'), right=Side(style='thin'),
|
||||
top=Side(style='thin'), bottom=Side(style='thin'))
|
||||
|
||||
def apply_style(cell_range, font, alignment=None, border=None):
|
||||
for row in ws[cell_range]:
|
||||
for cell in row:
|
||||
cell.font = font
|
||||
if alignment: cell.alignment = alignment
|
||||
if border: cell.border = border
|
||||
|
||||
# 写入表头
|
||||
headers = ['母岩母质','', '土种类型', '样点统计', '']
|
||||
ws.append(headers)
|
||||
ws.append(['', '', '', f'均值/{prop_unit}', '数量/个'])
|
||||
|
||||
# 合并表头单元格
|
||||
ws.merge_cells('A1:B2') # 母岩母质
|
||||
ws.merge_cells('C1:C2') # 土种类型
|
||||
ws.merge_cells('D1:E1') # 样点统计
|
||||
|
||||
current_row = 3
|
||||
|
||||
# 按母质和母岩进行分组
|
||||
grouped = df.groupby(['母质', '母岩']).agg({
|
||||
'TZ': lambda x: ','.join(x), # 将土种名称用逗号连接
|
||||
'mean': 'mean', # 计算均值
|
||||
'count': 'sum' # 计算总数
|
||||
}).reset_index()
|
||||
|
||||
parent_materials = grouped['母质'].unique()
|
||||
|
||||
for parent_material in parent_materials:
|
||||
|
||||
parent_material_row = current_row
|
||||
|
||||
if parent_material == '未知':
|
||||
continue
|
||||
|
||||
material_group = grouped[grouped['母质'] == parent_material]
|
||||
# 写入母岩母质分组(只在第一行显示)
|
||||
first_row_in_group = True
|
||||
|
||||
for _, row_data in material_group.iterrows():
|
||||
if first_row_in_group:
|
||||
# 第一行显示母岩母质名称
|
||||
ws.cell(row=current_row, column=1, value=parent_material)
|
||||
first_row_in_group = False
|
||||
else:
|
||||
# 后续行留空
|
||||
ws.cell(row=current_row, column=1, value='')
|
||||
|
||||
# 写入母岩类型
|
||||
ws.cell(row=current_row, column=2, value=row_data['母岩'])
|
||||
|
||||
# 写入土种类型(所有土种用逗号连接)
|
||||
ws.cell(row=current_row, column=3, value=row_data['TZ'])
|
||||
|
||||
# 写入统计数据
|
||||
ws.cell(row=current_row, column=4, value=round(row_data['mean'], 1))
|
||||
ws.cell(row=current_row, column=5, value=row_data['count'])
|
||||
|
||||
current_row += 1
|
||||
|
||||
# 合并母岩母质分组
|
||||
if parent_material_row < current_row:
|
||||
ws.merge_cells(start_row=parent_material_row, start_column=1, end_row=current_row - 1, end_column=1)
|
||||
|
||||
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
|
||||
ws.cell(row=current_row, column=1, value='全区')
|
||||
|
||||
# --- d. 应用样式和调整列宽 ---
|
||||
max_col_letter = get_column_letter(ws.max_column)
|
||||
if current_row > 1: # 确保有数据才应用样式
|
||||
apply_style(f'A1:{max_col_letter}{current_row}', cell_font, center_align, thin_border)
|
||||
apply_style(f'A1:{max_col_letter}2', header_font)
|
||||
|
||||
# 设置列宽
|
||||
ws.column_dimensions["A"].width = 20
|
||||
ws.column_dimensions["B"].width = 20
|
||||
ws.column_dimensions["C"].width = 30
|
||||
ws.column_dimensions["D"].width = 20
|
||||
ws.column_dimensions["E"].width = 20
|
||||
|
||||
# 保存文件
|
||||
wb.save(output_path)
|
||||
print(f"数据已成功写入到 {output_path}")
|
||||
|
||||
|
||||
def main(gdb_path, soil_prop_name, trlx_features, reclassed_feature, output_path,target_areas_df, prop_config):
|
||||
try:
|
||||
# --- 1. 用户配置 ---
|
||||
# 输出配置
|
||||
temp_files = []
|
||||
output_excel_path = os.path.join(output_path,f"{soil_prop_name}不同土壤类型土壤.xlsx") # 生成的Excel报告文件路径
|
||||
# output_excel4_path = os.path.join(output_path,f"{soil_prop_name}不同母岩母质土壤属性.xlsx")
|
||||
soil_prop_features = os.path.join(gdb_path,soil_prop_name)
|
||||
|
||||
# 设置工作空间和变量
|
||||
arcpy.env.workspace = gdb_path
|
||||
arcpy.env.overwriteOutput = True
|
||||
|
||||
print("开始处理数据...")
|
||||
|
||||
temp_out_features = r"in_memory/temp_out_type_features"
|
||||
out_table_mean = r"in_memory/out_table_type_mean"
|
||||
temp_files.append(temp_out_features)
|
||||
temp_files.append(out_table_mean)
|
||||
|
||||
# 2. 用样点进行空间连接到土壤类型图斑
|
||||
fields_to_keep = {
|
||||
soil_prop_features: [soil_prop_name],
|
||||
trlx_features: ["YL", "TS", "TZ"],
|
||||
}
|
||||
|
||||
field_mappings = arcpy.FieldMappings()
|
||||
|
||||
for join_features in fields_to_keep.keys():
|
||||
for field_name in fields_to_keep[join_features]:
|
||||
try:
|
||||
field_map = arcpy.FieldMap()
|
||||
field_map.addInputField(join_features, field_name)
|
||||
field_map.mergeRule = "First" # 对所有连接字段使用 "First" 规则
|
||||
field_mappings.addFieldMap(field_map)
|
||||
except Exception as e:
|
||||
print(f"警告: 添加字段 '{field_name}' (来自 '{join_features}') 时出错,将跳过。错误信息: {e}")
|
||||
# 空间连接
|
||||
arcpy.analysis.SpatialJoin(soil_prop_features, trlx_features, temp_out_features, "JOIN_ONE_TO_ONE", "KEEP_ALL",field_mappings, "INTERSECT")
|
||||
|
||||
# 3. 交集制表计算每个TRZD的面积
|
||||
arcpy.analysis.TabulateIntersection(trlx_features, "YL_TS", reclassed_feature, out_table_mean, "gridcode", out_units="SQUARE_METERS")
|
||||
|
||||
trlx_zhitu_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YL_TS", "gridcode", "AREA"]))
|
||||
trlx_sample_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(temp_out_features, ["YL", "TS", "TZ", soil_prop_name]))
|
||||
|
||||
# 获取土壤类型图斑面积
|
||||
trlx_area_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(trlx_features, ["YL", "TS", "Shape@Area"]))
|
||||
|
||||
# 处理表3数据
|
||||
final_dataframe = process_data_for_table3(soil_prop_name,trlx_sample_df, trlx_zhitu_df, trlx_area_df, target_areas_df)
|
||||
# print(final_dataframe)
|
||||
|
||||
# 生成表3
|
||||
write_to_excel_table3(final_dataframe, output_excel_path, prop_config)
|
||||
# 母岩母质表
|
||||
# write_to_excel_table4(df_mymz, output_excel4_path, prop_config)
|
||||
|
||||
# return df_with_factors
|
||||
except Exception as e:
|
||||
print(f"\n处理过程中发生严重错误: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
temp_files_processor.clean_up_temp_files(temp_files)
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
# --- 4. 主程序入口 ---
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
512
tools/core/soil_prop_stats/B3不同土壤类型土壤属性.py
Normal file
512
tools/core/soil_prop_stats/B3不同土壤类型土壤属性.py
Normal file
@@ -0,0 +1,512 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import arcpy
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font
|
||||
from openpyxl.utils import get_column_letter
|
||||
|
||||
from tools.config.pandas_field_cal_func import calculate_muyan, calculate_muzhi
|
||||
from tools.config.custom_sort import yl_order, ts_order
|
||||
from tools.core.utils.os_utils import temp_files_processor
|
||||
from tools.core.utils.excel_utils import ExcelStyleUtils
|
||||
|
||||
|
||||
# --- 3. 数据处理与分析 均值---
|
||||
def process_data_for_table3(soil_prop_name, df_trlx_sample, df_trlx_zhitu, df_trlx, target_areas_df):
|
||||
"""
|
||||
【最终版 v2】: 增加对制图样点数的处理,以支持加权平均计算。
|
||||
"""
|
||||
print("开始处理数据...")
|
||||
|
||||
def clean_df(df, columns) -> pd.DataFrame:
|
||||
for col in columns:
|
||||
df[col] = df[col].astype(str).str.strip()
|
||||
df.replace(['<Null>', 'None', '', '<空>'], np.nan, inplace=True)
|
||||
df.dropna(subset=columns, inplace=True)
|
||||
return df
|
||||
|
||||
# ==a. 处理样点数据,计算“样点均值” ---
|
||||
print("--> 步骤1: 计算样点均值...")
|
||||
field_name = soil_prop_name
|
||||
sample_fields = ['YL', 'TS', field_name]
|
||||
df_samples = clean_df(df_trlx_sample, sample_fields)
|
||||
df_samples[field_name] = df_samples[field_name].astype(float)
|
||||
|
||||
# 通过土属计算母岩母质
|
||||
df_samples['母岩'] = df_samples['TS'].apply(calculate_muyan)
|
||||
df_samples['母质'] = df_samples['母岩'].apply(calculate_muzhi)
|
||||
# 按 YJDL, EJDL 分组,计算 dPH 的均值
|
||||
df_sample_means = df_samples.groupby(['YL', 'TS'])[field_name].agg(['count', 'max', 'min', 'mean', 'median']).reset_index()
|
||||
df_sample_mymz = df_samples.groupby(['母质', '母岩', 'TZ'])[field_name].agg(['count', 'mean', 'median']).reset_index()
|
||||
# print(df_sample_mymz)
|
||||
|
||||
# ==b. 处理制图数据,获各等级制图面积
|
||||
df_trlx_zhitu["YL"] = df_trlx_zhitu['YL_TS'].apply(lambda x: x.split('_')[0])
|
||||
df_trlx_zhitu["TS"] = df_trlx_zhitu["YL_TS"].apply(lambda x: x.split('_')[1])
|
||||
df_trlx_zhitu = clean_df(df_trlx_zhitu, ['YL', 'TS'])
|
||||
df_trlx_zhitu.rename(columns={'MEAN': '制图均值', 'COUNT': '制图样点数'}, inplace=True)
|
||||
|
||||
# ==c. 处理制图数据,获各等级制图面积
|
||||
df_trlx = clean_df(df_trlx, ['YL', 'TS'])
|
||||
df_trlx["面积_亩"] = df_trlx["Shape@Area"] * 0.0015
|
||||
|
||||
filtered_props = ['ECA', 'EMG', 'ACU', 'AZN', 'AFE', 'AMN', 'AMO', 'AB', 'AS1', 'TSE']
|
||||
|
||||
# 拿到目标df总面积,计算比例进行平差
|
||||
print(target_areas_df)
|
||||
if soil_prop_name == "GZCHD":
|
||||
target_areas = target_areas_df[target_areas_df['EJDL'] == '耕地']['面积'].values[0]
|
||||
elif soil_prop_name in filtered_props:
|
||||
target_areas = target_areas_df[target_areas_df['EJDL'].isin(['耕地', '园地'])]['面积'].sum()
|
||||
else:
|
||||
target_areas = target_areas_df['面积'].sum()
|
||||
original_area = df_trlx['面积_亩'].sum()
|
||||
adjusted_area_yz = target_areas / original_area
|
||||
|
||||
df_trlx["面积_亩"] = df_trlx["面积_亩"] * adjusted_area_yz
|
||||
df_trlx_area = df_trlx.groupby(['YL', 'TS'])['面积_亩'].sum().reset_index()
|
||||
|
||||
|
||||
# --- c. 合并数据 ---
|
||||
print("--> 步骤3: 合并数据...")
|
||||
df_skeleton = pd.concat([
|
||||
df_sample_means[['YL', 'TS']],
|
||||
df_trlx_zhitu[['YL', 'TS']]
|
||||
]).drop_duplicates().reset_index(drop=True)
|
||||
|
||||
df_final = pd.merge(df_skeleton, df_sample_means, on=['YL', 'TS'], how='left')
|
||||
df_final = pd.merge(df_final, df_trlx_zhitu, on=['YL', 'TS'], how='left')
|
||||
df_final = pd.merge(df_final, df_trlx_area, on=['YL', 'TS'], how='left')
|
||||
|
||||
# (可选) 按“亚类”和“土属”排序
|
||||
in_yl_order = yl_order + [x for x in df_final['YL'].unique() if x not in yl_order]
|
||||
in_ts_order = ts_order + [x for x in df_final['TS'].unique() if x not in ts_order]
|
||||
df_final["YL"] = pd.Categorical(df_final['YL'], categories=in_yl_order, ordered=True)
|
||||
df_final["TS"] = pd.Categorical(df_final['TS'], categories=in_ts_order, ordered=True)
|
||||
df_final.sort_values(['YL', 'TS'], inplace=True)
|
||||
|
||||
print("数据处理流程完成!")
|
||||
return df_final, df_sample_mymz
|
||||
|
||||
# 写入EXCEL 表2
|
||||
def write_to_excel_table3(df, output_path, prop_config:dict, stats):
|
||||
"""
|
||||
将处理好的数据写入格式化的 Excel 文件。
|
||||
"""
|
||||
if df.empty:
|
||||
print("警告: 没有数据可以写入 Excel。")
|
||||
return
|
||||
|
||||
print(f"开始生成 Excel 报告到 '{output_path}'...")
|
||||
wb = Workbook()
|
||||
ws = wb.create_sheet("Mysheet", 0)
|
||||
ws.title = "不同土壤类型属性变化统计"
|
||||
|
||||
# 获取属性单位
|
||||
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅','全钾']
|
||||
fsn_props = ['砂粒含量','粉粒含量','黏粒含量']
|
||||
prop_name_str = prop_config.get('项目分级','')
|
||||
if prop_name_str:
|
||||
split_name = prop_name_str.split('\n')[0].strip()
|
||||
if split_name in special_prop:
|
||||
prop_name = '1f'
|
||||
elif split_name in fsn_props:
|
||||
prop_name = '0f'
|
||||
else:
|
||||
prop_name = '2f'
|
||||
else:
|
||||
prop_name = '1f'
|
||||
|
||||
prop_unit_str = prop_config.get('分级标准', '')
|
||||
if prop_unit_str:
|
||||
prop_unit = prop_unit_str.split('\n')[1].strip()
|
||||
else:
|
||||
prop_unit = ''
|
||||
|
||||
# --- b. 绘制表头 ---
|
||||
ws.merge_cells('A1:B1'); ws['A1'] = '土壤类型'
|
||||
ws.merge_cells('C1:F1'); ws['C1'] = '样点统计'
|
||||
ws.merge_cells('G1:H1'); ws['G1'] = '制图统计'
|
||||
|
||||
ws['A2'] = '亚类'
|
||||
ws['B2'] = '土属'
|
||||
ws['C2'] = '均值/' + prop_unit
|
||||
ws['D2'] = '中位值/' + prop_unit
|
||||
ws['E2'] = '范围/' + prop_unit
|
||||
ws['F2'] = '数量/个'
|
||||
ws['G2'] = '均值/' + prop_unit
|
||||
ws['H2'] = '面积/亩'
|
||||
|
||||
# --- c. 填充数据 ---
|
||||
current_row = 3
|
||||
|
||||
df_to_write = df.copy() # 使用 .copy() 避免 SettingWithCopyWarning
|
||||
|
||||
for yl, group_yl_df in df_to_write.groupby('YL', sort=False, observed=True):
|
||||
|
||||
print(f"正在写入亚类: {yl}...")
|
||||
yl_start_row = current_row
|
||||
|
||||
# 遍历该亚类下下的所有“土属”
|
||||
for _, row_data in group_yl_df.iterrows():
|
||||
ws.cell(row=current_row, column=2).value = row_data['TS']
|
||||
|
||||
# 填充样点数据
|
||||
sample_mean = row_data.get('mean')
|
||||
if pd.notna(sample_mean):
|
||||
ws.cell(row=current_row, column=3).value = f"{sample_mean:.{prop_name}}"
|
||||
ws.cell(row=current_row, column=4).value = f"{row_data.get('median', '-'):.{prop_name}}"
|
||||
ws.cell(row=current_row, column=5).value = f"{row_data.get('min', '-'):.{prop_name}}~{row_data.get('max', '-'):.{prop_name}}"
|
||||
ws.cell(row=current_row, column=6).value = row_data.get('count', '-')
|
||||
else:
|
||||
ws.cell(row=current_row, column=3).value = "-"
|
||||
ws.cell(row=current_row, column=4).value = "-"
|
||||
ws.cell(row=current_row, column=5).value = "-"
|
||||
ws.cell(row=current_row, column=6).value = "-"
|
||||
|
||||
# 填充制图数据
|
||||
map_mean = row_data.get('制图均值')
|
||||
if pd.notna(map_mean):
|
||||
ws.cell(row=current_row, column=7).value = f"{map_mean:.{prop_name}}"
|
||||
ws.cell(row=current_row, column=8).value = f"{row_data.get('面积_亩', '-'):.0f}"
|
||||
else:
|
||||
ws.cell(row=current_row, column=7).value = "-"
|
||||
ws.cell(row=current_row, column=8).value = "-"
|
||||
|
||||
current_row += 1
|
||||
|
||||
# 计算并写入“合计”行
|
||||
if ws.cell(row=current_row-1, column=2).value in ["林地", "草地", "其他"]:
|
||||
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=yl_start_row, end_column=2)
|
||||
ws.cell(row=yl_start_row, column=1).value = yl
|
||||
continue
|
||||
|
||||
ws.cell(row=current_row, column=2).value = '合计'
|
||||
|
||||
# 计算合计行的均值 (均值的均值)
|
||||
total_count = group_yl_df['count'].sum()
|
||||
weighted_sum = group_yl_df['mean'] * group_yl_df['count']
|
||||
if not weighted_sum.empty and total_count != 0:
|
||||
total_sample_mean = weighted_sum.sum() / total_count
|
||||
else:
|
||||
total_sample_mean = None
|
||||
total_median = group_yl_df['median'].mean()
|
||||
min_min, max_max = group_yl_df['min'].min(), group_yl_df['max'].max()
|
||||
|
||||
|
||||
if pd.notna(total_sample_mean):
|
||||
ws.cell(row=current_row, column=3).value = f"{total_sample_mean:.{prop_name}}"
|
||||
ws.cell(row=current_row, column=4).value = f"{total_median:.{prop_name}}"
|
||||
ws.cell(row=current_row, column=5).value = f"{min_min:.{prop_name}}~{max_max:.{prop_name}}"
|
||||
ws.cell(row=current_row, column=6).value = f"{total_count:.0f}"
|
||||
else:
|
||||
ws.cell(row=current_row, column=3).value = "-"
|
||||
ws.cell(row=current_row, column=4).value = "-"
|
||||
ws.cell(row=current_row, column=5).value = "-"
|
||||
ws.cell(row=current_row, column=6).value = "-"
|
||||
|
||||
# b. **【核心修正】: 计算合计行的“制图均值”(加权平均)**
|
||||
# 准备加权平均的分子和分母
|
||||
weighted_sum = 0
|
||||
total_count = 0
|
||||
|
||||
# 遍历当前一级地类分组中的每一行
|
||||
for _, row in group_yl_df.iterrows():
|
||||
mean_val = row.get('制图均值')
|
||||
count_val = row.get('制图样点数')
|
||||
|
||||
# 只有当均值和样点数都存在且有效时,才参与计算
|
||||
if pd.notna(mean_val) and pd.notna(count_val) and count_val > 0:
|
||||
weighted_sum += mean_val * count_val # Σ (mean * count)
|
||||
total_count += count_val # Σ (count)
|
||||
|
||||
# 计算加权平均值
|
||||
weighted_avg = (weighted_sum / total_count) if total_count > 0 else 0
|
||||
total_area = group_yl_df['面积_亩'].sum()
|
||||
|
||||
if weighted_avg > 0:
|
||||
ws.cell(row=current_row, column=7).value = f"{weighted_avg:.{prop_name}}"
|
||||
ws.cell(row=current_row, column=8).value = f"{total_area:.0f}"
|
||||
else:
|
||||
ws.cell(row=current_row, column=7).value = "-"
|
||||
ws.cell(row=current_row, column=8).value = "-"
|
||||
|
||||
# 合并“一级地类”单元格
|
||||
if yl_start_row <= current_row:
|
||||
ws.merge_cells(start_row=yl_start_row, start_column=1, end_row=current_row, end_column=1)
|
||||
ws.cell(row=yl_start_row, column=1).value = yl
|
||||
|
||||
current_row += 1
|
||||
|
||||
# 计算全区的均值、范围、数量
|
||||
# total_counts = df_to_write['count'].sum()
|
||||
# total_weighted_sum = df_to_write['mean'] * df_to_write['count']
|
||||
# total_mean = total_weighted_sum.sum() / total_counts
|
||||
# total_median = df_to_write['median'].mean()
|
||||
total_range = f"{df_to_write['min'].min():.{prop_name}}~{df_to_write['max'].max():.{prop_name}}"
|
||||
total_zhitu_weighted_sum = df_to_write['制图均值'] * df_to_write['面积_亩']
|
||||
total_areas = df_to_write['面积_亩'].sum()
|
||||
total_zhitu_mean = total_zhitu_weighted_sum.sum() / total_areas
|
||||
|
||||
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=2)
|
||||
ws.cell(row=current_row, column=1).value = '全区'
|
||||
ws.cell(row=current_row, column=3).value = f"{stats['mean']:.{prop_name}}"
|
||||
ws.cell(row=current_row, column=4).value = f"{stats['median']:.{prop_name}}"
|
||||
ws.cell(row=current_row, column=5).value = total_range
|
||||
ws.cell(row=current_row, column=6).value = f"{stats['count']:.0f}"
|
||||
ws.cell(row=current_row, column=7).value = f"{total_zhitu_mean:.{prop_name}}"
|
||||
ws.cell(row=current_row, column=8).value = f"{total_areas:.0f}"
|
||||
|
||||
# --- a. 定义样式 ---
|
||||
header_font = Font(name='等线', size=11, bold=True)
|
||||
# --- d. 应用样式和调整列宽 ---
|
||||
max_col_letter = get_column_letter(ws.max_column)
|
||||
if current_row > 1: # 确保有数据才应用样式
|
||||
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}{current_row}')
|
||||
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
|
||||
|
||||
print("正在自动调整列宽...")
|
||||
|
||||
# 设置列宽
|
||||
ExcelStyleUtils.auto_adjust_column_width(ws)
|
||||
|
||||
# --- e. 保存文件 ---
|
||||
wb.save(output_path)
|
||||
print("Excel 报告生成成功!")
|
||||
|
||||
# 母岩母质表
|
||||
def write_to_excel_table4(df:pd.DataFrame, output_path, prop_config, stats):
|
||||
if df.empty:
|
||||
print("警告: 没有数据可以写入 Excel。")
|
||||
return
|
||||
|
||||
wb = Workbook()
|
||||
ws = wb.create_sheet("Mysheet", 0)
|
||||
ws.title = "母岩母质土壤属性统计"
|
||||
|
||||
# 获取属性单位
|
||||
special_prop = ['耕作层厚度','阳离子','有机质','pH','有效磷','速效钾','交换性钙','交换性镁','有效硫','有效铁','有效锰','有效硅','全钾']
|
||||
fsn_props = ['砂粒含量','粉粒含量','黏粒含量','有效土层厚度']
|
||||
prop_name_str = prop_config.get('项目分级','')
|
||||
if prop_name_str:
|
||||
split_name = prop_name_str.split('\n')[0].strip()
|
||||
if split_name in special_prop:
|
||||
prop_name = '1f'
|
||||
elif split_name in fsn_props:
|
||||
prop_name = '0f'
|
||||
else:
|
||||
prop_name = '2f'
|
||||
else:
|
||||
prop_name = '1f'
|
||||
|
||||
prop_unit_str = prop_config.get('分级标准', '')
|
||||
if prop_unit_str:
|
||||
prop_unit = prop_unit_str.split('\n')[1].strip()
|
||||
else:
|
||||
prop_unit = ''
|
||||
|
||||
# 写入表头
|
||||
headers = ['母岩母质','', '土种类型', '样点统计', '']
|
||||
ws.append(headers)
|
||||
ws.append(['', '', '', f'均值/{prop_unit}', '数量/个'])
|
||||
|
||||
# 合并表头单元格
|
||||
ws.merge_cells('A1:B2') # 母岩母质
|
||||
ws.merge_cells('C1:C2') # 土种类型
|
||||
ws.merge_cells('D1:E1') # 样点统计
|
||||
|
||||
current_row = 3
|
||||
|
||||
# 按母质和母岩进行分组
|
||||
grouped = df.groupby(['母质', '母岩']).agg({
|
||||
'TZ': lambda x: ','.join(x), # 将土种名称用逗号连接
|
||||
'mean': 'mean', # 计算均值
|
||||
'count': 'sum' # 计算总数
|
||||
}).reset_index()
|
||||
|
||||
parent_materials = grouped['母质'].unique()
|
||||
|
||||
for parent_material in parent_materials:
|
||||
|
||||
parent_material_row = current_row
|
||||
|
||||
if parent_material == '未知':
|
||||
continue
|
||||
|
||||
material_group = grouped[grouped['母质'] == parent_material]
|
||||
# 写入母岩母质分组(只在第一行显示)
|
||||
first_row_in_group = True
|
||||
|
||||
for _, row_data in material_group.iterrows():
|
||||
if first_row_in_group:
|
||||
# 第一行显示母岩母质名称
|
||||
ws.cell(row=current_row, column=1, value=parent_material)
|
||||
first_row_in_group = False
|
||||
else:
|
||||
# 后续行留空
|
||||
ws.cell(row=current_row, column=1, value='')
|
||||
|
||||
# 写入母岩类型
|
||||
ws.cell(row=current_row, column=2, value=row_data['母岩'])
|
||||
|
||||
# 写入土种类型(所有土种用逗号连接)
|
||||
ws.cell(row=current_row, column=3, value=row_data['TZ'])
|
||||
|
||||
# 写入统计数据
|
||||
ws.cell(row=current_row, column=4, value=round(row_data['mean'], 1))
|
||||
ws.cell(row=current_row, column=5, value=row_data['count'])
|
||||
|
||||
current_row += 1
|
||||
|
||||
# 合并母岩母质分组
|
||||
if parent_material_row < current_row:
|
||||
ws.merge_cells(start_row=parent_material_row, start_column=1, end_row=current_row - 1, end_column=1)
|
||||
|
||||
# 计算合计值并写入
|
||||
# total_mean = 0
|
||||
# total_count = df['count'].sum()
|
||||
# total_sum = df['mean'] * df['count']
|
||||
# if total_count and total_count!=0:
|
||||
# total_mean = total_sum.sum() / total_count
|
||||
|
||||
ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=3)
|
||||
ws.cell(row=current_row, column=1, value='全区')
|
||||
ws.cell(row=current_row, column=4, value=f"{stats['mean']:.{prop_name}}")
|
||||
ws.cell(row=current_row, column=5, value=f"{stats['count']:.0f}")
|
||||
|
||||
# --- a. 定义样式 ---
|
||||
header_font = Font(name='等线', size=11, bold=True)
|
||||
|
||||
# --- d. 应用样式和调整列宽 ---
|
||||
max_col_letter = get_column_letter(ws.max_column)
|
||||
if current_row > 1: # 确保有数据才应用样式
|
||||
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}{current_row}')
|
||||
ExcelStyleUtils.set_style(ws, f'A1:{max_col_letter}2', header_font)
|
||||
|
||||
# 设置列宽
|
||||
ws.column_dimensions["A"].width = 20
|
||||
ws.column_dimensions["B"].width = 20
|
||||
ws.column_dimensions["C"].width = 30
|
||||
ws.column_dimensions["D"].width = 20
|
||||
ws.column_dimensions["E"].width = 20
|
||||
|
||||
# 保存文件
|
||||
wb.save(output_path)
|
||||
print(f"数据已成功写入到 {output_path}")
|
||||
|
||||
|
||||
def main(gdb_path, soil_prop_name, trlx_features, soil_prop_tif, output_path,target_areas_df, prop_config, dltb_features):
|
||||
try:
|
||||
# --- 1. 用户配置 ---
|
||||
# 输出配置
|
||||
temp_files = []
|
||||
output_excel_path = os.path.join(output_path,f"{soil_prop_name}不同土壤类型土壤.xlsx") # 生成的Excel报告文件路径
|
||||
output_excel4_path = os.path.join(output_path,f"{soil_prop_name}不同母岩母质土壤属性.xlsx")
|
||||
soil_prop_features = os.path.join(gdb_path,soil_prop_name)
|
||||
|
||||
# 设置工作空间和变量
|
||||
arcpy.env.workspace = gdb_path
|
||||
arcpy.env.overwriteOutput = True
|
||||
|
||||
print("开始处理数据...")
|
||||
if soil_prop_name == "GZCHD":
|
||||
temp_gdtb_trlx_out = r"in_memory/temp_gdtb_trlx_out"
|
||||
temp_gdtb_trlx = r"in_memory/temp_gdtb_trlx"
|
||||
temp_files.append(temp_gdtb_trlx)
|
||||
|
||||
temp_out_features = r"in_memory/temp_out_type_features"
|
||||
out_table_mean = r"in_memory/out_table_type_mean"
|
||||
temp_files.append(temp_out_features)
|
||||
temp_files.append(out_table_mean)
|
||||
|
||||
# 2. 用样点进行空间连接到土壤类型图斑
|
||||
fields_to_keep = {
|
||||
soil_prop_features: [soil_prop_name],
|
||||
trlx_features: ["YL", "TS", "TZ"],
|
||||
}
|
||||
|
||||
field_mappings = arcpy.FieldMappings()
|
||||
|
||||
for join_features in fields_to_keep.keys():
|
||||
for field_name in fields_to_keep[join_features]:
|
||||
try:
|
||||
field_map = arcpy.FieldMap()
|
||||
field_map.addInputField(join_features, field_name)
|
||||
field_map.mergeRule = "First" # 对所有连接字段使用 "First" 规则
|
||||
field_mappings.addFieldMap(field_map)
|
||||
except Exception as e:
|
||||
print(f"警告: 添加字段 '{field_name}' (来自 '{join_features}') 时出错,将跳过。错误信息: {e}")
|
||||
|
||||
# 定义需要过滤地类的属性列表
|
||||
filtered_props = ['ECA', 'EMG', 'ACU', 'AZN', 'AFE', 'AMN', 'AMO', 'AB', 'AS1', 'TSE']
|
||||
|
||||
# 空间连接
|
||||
arcpy.analysis.SpatialJoin(soil_prop_features, trlx_features, temp_out_features, "JOIN_ONE_TO_ONE", "KEEP_ALL",field_mappings, "INTERSECT")
|
||||
|
||||
if soil_prop_name == "GZCHD":
|
||||
arcpy.analysis.Intersect([trlx_features, dltb_features], temp_gdtb_trlx, 'NO_FID')
|
||||
arcpy.conversion.ExportFeatures(temp_gdtb_trlx,temp_gdtb_trlx_out,"DLBM LIKE '01%'")
|
||||
|
||||
# 3. 以表格显示分区统计 计算均值
|
||||
arcpy.sa.ZonalStatisticsAsTable(temp_gdtb_trlx_out, "YL_TS", soil_prop_tif, out_table_mean, "DATA", "MEAN")
|
||||
trlx_area_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(temp_gdtb_trlx_out, ["YL", "TS", "Shape@Area"]))
|
||||
# 如果当前属性在列表中,则只统计耕地和园地
|
||||
elif soil_prop_name in filtered_props:
|
||||
temp_gdtb_trlx_filtered = r"in_memory/temp_gdtb_trlx_filtered"
|
||||
temp_gdtb_trlx_out_filtered = r"in_memory/temp_gdtb_trlx_out_filtered"
|
||||
temp_files.append(temp_gdtb_trlx_filtered)
|
||||
temp_files.append(temp_gdtb_trlx_out_filtered)
|
||||
|
||||
# 交集土壤类型与土地利用图斑
|
||||
arcpy.analysis.Intersect([trlx_features, dltb_features], temp_gdtb_trlx_filtered, 'NO_FID')
|
||||
# 导出耕地和园地(DLBM LIKE '01%' OR DLBM LIKE '02%')
|
||||
arcpy.conversion.ExportFeatures(temp_gdtb_trlx_filtered, temp_gdtb_trlx_out_filtered, "DLBM LIKE '01%' OR DLBM LIKE '02%'")
|
||||
|
||||
# 使用过滤后的图斑进行分区统计(制图均值)
|
||||
arcpy.sa.ZonalStatisticsAsTable(temp_gdtb_trlx_out_filtered, "YL_TS", soil_prop_tif, out_table_mean, "DATA", "MEAN")
|
||||
# 获取过滤后的面积
|
||||
trlx_area_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(temp_gdtb_trlx_out_filtered, ["YL", "TS", "Shape@Area"]))
|
||||
|
||||
print(f"过滤制图数据:仅统计耕地和园地(DLBM LIKE '01%' OR '02%')")
|
||||
else:
|
||||
# 3. 以表格显示分区统计 计算均值
|
||||
arcpy.sa.ZonalStatisticsAsTable(trlx_features, "YL_TS", soil_prop_tif, out_table_mean, "DATA", "MEAN")
|
||||
# 获取土壤类型图斑面积
|
||||
trlx_area_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(trlx_features, ["YL", "TS", "Shape@Area"]))
|
||||
|
||||
trlx_zhitu_df = pd.DataFrame(arcpy.da.TableToNumPyArray(out_table_mean, ["YL_TS", "MEAN", "COUNT"]))
|
||||
trlx_sample_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(temp_out_features, ["YL", "TS", "TZ", soil_prop_name]))
|
||||
|
||||
stat_sample = {
|
||||
'min': trlx_sample_df[soil_prop_name].min(),
|
||||
'max': trlx_sample_df[soil_prop_name].max(),
|
||||
'mean':trlx_sample_df[soil_prop_name].mean(),
|
||||
'median': trlx_sample_df[soil_prop_name].median(),
|
||||
'count': trlx_sample_df[soil_prop_name].count()
|
||||
}
|
||||
|
||||
|
||||
# 处理表3数据
|
||||
final_dataframe, df_mymz = process_data_for_table3(soil_prop_name,trlx_sample_df, trlx_zhitu_df, trlx_area_df, target_areas_df)
|
||||
# print(final_dataframe)
|
||||
|
||||
# 生成表3
|
||||
write_to_excel_table3(final_dataframe, output_excel_path, prop_config, stat_sample)
|
||||
# 母岩母质表
|
||||
write_to_excel_table4(df_mymz, output_excel4_path, prop_config,stat_sample)
|
||||
|
||||
# return df_with_factors
|
||||
except Exception as e:
|
||||
print(f"\n处理过程中发生严重错误: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
temp_files_processor.clean_up_temp_files(temp_files)
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
# --- 4. 主程序入口 ---
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
1269
tools/core/soil_prop_stats/E1土壤属性历史变化.py
Normal file
1269
tools/core/soil_prop_stats/E1土壤属性历史变化.py
Normal file
File diff suppressed because it is too large
Load Diff
0
tools/core/soil_prop_stats/__init__.py
Normal file
0
tools/core/soil_prop_stats/__init__.py
Normal file
Reference in New Issue
Block a user