refactor: 重构项目结构,将geo_tools重命名为app并更新相关引用

- 将主包名从geo_tools改为app
- 更新所有模块中的引用路径
- 迁移并更新测试用例
- 添加项目规则文档
- 保持原有功能不变,仅进行结构调整
This commit is contained in:
2026-04-12 19:49:56 +08:00
parent fcb8e1f255
commit db51d41aef
41 changed files with 4132 additions and 808 deletions

View File

@@ -17,8 +17,8 @@ from pathlib import Path
import sys
sys.path.insert(0, str(Path(__file__).parent.parent))
import geo_tools
from geo_tools.utils.logger import get_logger
import app
from app.utils.logger import get_logger
logger = get_logger("example_workflow")
@@ -29,37 +29,37 @@ OUTPUT_DIR.mkdir(exist_ok=True)
def main() -> None:
logger.info("=" * 60)
logger.info("geo_tools 端到端工作流示例 v%s", geo_tools.__version__)
logger.info("geo_tools 端到端工作流示例 v%s", app.__version__)
logger.info("=" * 60)
# ── 1. 读取示例点数据 ──────────────────────────────────────────
logger.info("\n[步骤 1] 读取示例点数据GeoJSON")
points = geo_tools.read_vector(DATA_DIR / "sample_points.geojson")
points = app.read_vector(DATA_DIR / "sample_points.geojson")
logger.info(" 读取完成:%d 条要素CRS=%s", len(points), points.crs)
logger.info(" 字段:%s", list(points.columns))
# ── 2. 读取示例面数据 ──────────────────────────────────────────
logger.info("\n[步骤 2] 读取示例区域多边形GeoJSON")
regions = geo_tools.read_vector(DATA_DIR / "sample_regions.geojson")
regions = app.read_vector(DATA_DIR / "sample_regions.geojson")
logger.info(" 区域列表:%s", regions["name"].tolist())
# ── 3. 数据校验 ───────────────────────────────────────────────
logger.info("\n[步骤 3] 几何有效性校验")
stats = geo_tools.validate_geometry(points)
stats = app.validate_geometry(points)
logger.info(" 点数据校验结果:%s", stats)
stats = geo_tools.validate_geometry(regions)
stats = app.validate_geometry(regions)
logger.info(" 面数据校验结果:%s", stats)
# ── 4. 坐标系信息 ─────────────────────────────────────────────
logger.info("\n[步骤 4] 查询 CRS 信息")
crs_info = geo_tools.get_crs_info("EPSG:4326")
crs_info = app.get_crs_info("EPSG:4326")
logger.info(" WGS84 信息:%s", crs_info)
proj_crs = geo_tools.suggest_projected_crs(116.4, 39.9)
proj_crs = app.suggest_projected_crs(116.4, 39.9)
logger.info(" 北京适合的投影 CRS%s", proj_crs)
# ── 5. 重投影 ─────────────────────────────────────────────────
logger.info("\n[步骤 5] 重投影到 Web Mercator用于可视化")
points_3857 = geo_tools.reproject(points, "EPSG:3857")
points_3857 = app.reproject(points, "EPSG:3857")
logger.info(" 重投影完成CRS=%s", points_3857.crs)
# ── 6. 面积加权均值 ───────────────────────────────────────────
@@ -68,31 +68,31 @@ def main() -> None:
points_buffered = points.to_crs("EPSG:3857").copy()
points_buffered["geometry"] = points_buffered.geometry.buffer(100_000) # 100km缓冲
points_buffered = points_buffered.to_crs("EPSG:4326")
from geo_tools.analysis.stats import area_weighted_mean
from app.analysis.stats import area_weighted_mean
aw_result = area_weighted_mean(points_buffered, value_col="value")
logger.info(" 全局面积加权均值:%.4f", aw_result["area_weighted_mean"])
# ── 7. 按位置选择 ─────────────────────────────────────────────
logger.info("\n[步骤 7] 按位置选择:筛选华南区域内的城市")
hua_nan = regions[regions["name"] == "华南"]
points_in_huanan = geo_tools.select_by_location(points, hua_nan, predicate="intersects")
points_in_huanan = app.select_by_location(points, hua_nan, predicate="intersects")
logger.info(" 华南区域内的城市:%s", points_in_huanan["name"].tolist())
# ── 8. 统计汇总 ───────────────────────────────────────────────
logger.info("\n[步骤 8] 属性统计汇总")
from geo_tools.analysis.stats import summarize_attributes
from app.analysis.stats import summarize_attributes
summary = summarize_attributes(points, columns=["value"], group_col="category")
logger.info(" 按分类汇总:\n%s", summary.to_string(index=False))
# ── 9. 写出结果 ───────────────────────────────────────────────
logger.info("\n[步骤 9] 写出处理结果")
out_geojson = OUTPUT_DIR / "result_points_3857.geojson"
geo_tools.write_vector(points_3857, out_geojson)
app.write_vector(points_3857, out_geojson)
logger.info(" GeoJSON 写出:%s", out_geojson)
out_gpkg = OUTPUT_DIR / "results.gpkg"
geo_tools.write_gpkg(points, out_gpkg, layer="original_points")
geo_tools.write_gpkg(regions, out_gpkg, layer="regions", mode="a")
app.write_gpkg(points, out_gpkg, layer="original_points")
app.write_gpkg(regions, out_gpkg, layer="regions", mode="a")
logger.info(" GPKG 写出2 图层):%s", out_gpkg)
logger.info("\n" + "=" * 60)

View File

@@ -0,0 +1,188 @@
import os
from pathlib import Path
import time
import geopandas as gpd
from geopandas.io import file
import pandas as pd
import numpy as np
def assign_gzchd_flexible_v2(soil_prop, point_path, polygon_path, output_path):
print("正在读取数据...")
points = gpd.read_file(point_path)
polygons = gpd.read_file(polygon_path)
# 1. 坐标系转换
if points.crs != polygons.crs:
print(f"坐标系不一致,正在转换点数据...")
points = points.to_crs(polygons.crs)
# 2. 预处理, 判断样点是否存在TZ字段如果不存在则用TDLYLX字段代替,并将其转为字符串类型,如果两个字段都不存在,则报错
if 'TZ' not in points.columns:
if 'TDLYLX' in points.columns:
points['TZ'] = points['TDLYLX'].astype(str).str.strip()
else:
raise ValueError("点要素类中不存在TZ或TDLYLX字段无法进行匹配")
else:
points['TZ'] = points['TZ'].astype(str).str.strip()
polygons['TZ'] = polygons['TZ'].astype(str).str.strip()
# 确保 GZCHD 是数值类型,避免合并时类型冲突
points[soil_prop] = pd.to_numeric(points[soil_prop], errors='coerce').fillna(0)
if soil_prop in polygons.columns:
polygons = polygons.drop(columns=[soil_prop])
# 辅助函数:按指定字段分组进行最近点匹配
def match_by_attribute(poly_gdf, pt_gdf, attr_name, suffix):
if attr_name not in poly_gdf.columns or attr_name not in pt_gdf.columns:
return None, []
poly_sub = poly_gdf[poly_gdf[attr_name].notna()].copy()
point_sub = pt_gdf[pt_gdf[attr_name].notna()].copy()
if poly_sub.empty or point_sub.empty:
return None, []
poly_sub[attr_name] = poly_sub[attr_name].astype(str).str.strip()
point_sub[attr_name] = point_sub[attr_name].astype(str).str.strip()
common_values = set(poly_sub[attr_name].unique()) & set(point_sub[attr_name].unique())
if not common_values:
return None, []
matched_parts = []
matched_ids = []
for value in common_values:
poly_part = poly_sub[poly_sub[attr_name] == value].copy()
point_part = point_sub[point_sub[attr_name] == value][[soil_prop, 'geometry']].copy()
if poly_part.empty or point_part.empty:
continue
matched_part = gpd.sjoin_nearest(poly_part, point_part, how='left', rsuffix=suffix)
matched_part = matched_part[~matched_part.index.duplicated(keep='first')]
if not matched_part.empty:
matched_parts.append(matched_part)
matched_ids.extend(matched_part.index.tolist())
if matched_parts:
return pd.concat(matched_parts), matched_ids
return None, []
matched_results = []
matched_indices = []
# --- 第一步:按相同 TZ 匹配 ---
print("步骤 1: 正在匹配相同 TZ 的最近点...")
first_matched, first_ids = match_by_attribute(polygons, points, 'TZ', '_p1')
if first_matched is not None and not first_matched.empty:
matched_results.append(first_matched)
matched_indices.extend(first_ids)
# --- 第二步:按 TS 匹配未匹配面 ---
unmatched_mask = ~polygons.index.isin(matched_indices)
remaining_polygons = polygons[unmatched_mask].copy()
print(f"步骤 2: 正在为 {len(remaining_polygons)} 个要素匹配 TS 最近点...")
if not remaining_polygons.empty:
ts_matched, ts_ids = match_by_attribute(remaining_polygons, points, 'TS', '_p_ts')
if ts_matched is not None and not ts_matched.empty:
matched_results.append(ts_matched)
matched_indices.extend(ts_ids)
remaining_polygons = polygons[~polygons.index.isin(matched_indices)].copy()
print(f"已匹配 TS: {len(ts_ids)} 个要素,剩余 {len(remaining_polygons)} 个。")
else:
print("未匹配到 TS 类型要素,继续下一步。")
# --- 第三步:按 YL 匹配未匹配面 ---
if not remaining_polygons.empty:
print(f"步骤 3: 正在为 {len(remaining_polygons)} 个要素匹配 YL 最近点...")
yl_matched, yl_ids = match_by_attribute(remaining_polygons, points, 'YL', '_p_yl')
if yl_matched is not None and not yl_matched.empty:
matched_results.append(yl_matched)
matched_indices.extend(yl_ids)
remaining_polygons = polygons[~polygons.index.isin(matched_indices)].copy()
print(f"已匹配 YL: {len(yl_ids)} 个要素,剩余 {len(remaining_polygons)} 个。")
else:
print("未匹配到 YL 类型要素,继续下一步。")
# --- 第四步:按 TL 匹配未匹配面 ---
if not remaining_polygons.empty:
print(f"步骤 4: 正在为 {len(remaining_polygons)} 个要素匹配 TL 最近点...")
tl_matched, tl_ids = match_by_attribute(remaining_polygons, points, 'TL', '_p_tl')
if tl_matched is not None and not tl_matched.empty:
matched_results.append(tl_matched)
matched_indices.extend(tl_ids)
remaining_polygons = polygons[~polygons.index.isin(matched_indices)].copy()
print(f"已匹配 TL: {len(tl_ids)} 个要素,剩余 {len(remaining_polygons)} 个。")
else:
print("未匹配到 TL 类型要素,继续全局最近点。")
else:
print("没有未匹配的面要素,跳过 TS/YL/TL 匹配。")
# --- 最后:全局最近点匹配剩余面 ---
unmatched_mask = ~polygons.index.isin(matched_indices)
remaining_polygons = polygons[unmatched_mask].copy()
print(f"最后一步: 正在为 {len(remaining_polygons)} 个要素匹配全局最近点...")
if not remaining_polygons.empty:
point_pool = points[[soil_prop, 'geometry']]
step_final = gpd.sjoin_nearest(remaining_polygons, point_pool, how='left', rsuffix='_p2')
step_final = step_final[~step_final.index.duplicated(keep='first')]
matched_results.append(step_final)
# --- 第三步:稳健合并 ---
print("正在合并数据...")
# 过滤掉列表中可能的 None 或空 DataFrame防止 FutureWarning
to_concat = [res for res in matched_results if res is not None and not res.empty]
if to_concat:
final_gdf = pd.concat(to_concat)
else:
# 如果没有任何匹配结果,返回带空 GZCHD 的原面要素
final_gdf = polygons.copy()
final_gdf[soil_prop] = 0
# --- 4. 清理与保存 ---
# 删除临时列
cols_to_drop = [
c for c in final_gdf.columns
if 'index_' in c
or '_p1' in c
or '_p2' in c
or '_p_ts' in c
or '_p_yl' in c
or '_p_tl' in c
]
final_gdf = final_gdf.drop(columns=cols_to_drop)
# 强制去重复列名
final_gdf = final_gdf.loc[:, ~final_gdf.columns.duplicated()]
# 填充空值并确保类型一致
if soil_prop in final_gdf.columns:
final_gdf[soil_prop] = final_gdf[soil_prop].fillna(0)
else:
final_gdf[soil_prop] = 0
print(f"正在保存结果至: {output_path}")
final_gdf.to_file(output_path, encoding='utf-8')
print("处理完成!")
return final_gdf
if __name__ == "__main__":
# 遍历文件夹中所有样点shp文件并进行处理
shp_file = r"E:\@三普属性图出图\测试\YXTCHD.shp" # 样点数据文件夹
dltb_file = r"E:\@三普属性图出图\广西天峨县\@基础数据\土壤类型图\土壤类型图.shp" # 耕地图斑
output_folder = r"E:\@三普属性图出图\广西天峨县" # 输出文件夹
assign_gzchd_flexible_v2(
soil_prop= "YXTCHD", # 耕地层厚度字段名
point_path=shp_file, # 样点数据
polygon_path= dltb_file,
output_path=fr'{output_folder}\YXTCHD.shp' # 输出文件
)
time.sleep(2) # 防止文件读写冲突

View File

@@ -0,0 +1,404 @@
import pandas as pd
import numpy as np
import os
import geopandas as gpd
from openpyxl import Workbook
from openpyxl.styles import Alignment, Font, Border, Side
from openpyxl.utils import get_column_letter
# 定义指标代码与单位的对应关系
INDICATOR_UNITS = {
# 基本指标
'PH': ('pH', '-'),
'ECA': ('交换性钙', 'cmol(½Ca²⁺)/kg'),
'EMG': ('交换性镁', 'cmol(½Mg²⁺)/kg'),
'TN': ('全氮', 'g/kg'),
'TP': ('全磷', 'g/kg'),
'TK': ('全钾', 'g/kg'),
'AS1': ('有效硫', 'mg/kg'),
'AB': ('有效硼', 'mg/kg'),
'AP': ('有效磷', 'mg/kg'),
'AFE': ('有效铁', 'mg/kg'),
'ACU': ('有效铜', 'mg/kg'),
'AZN': ('有效锌', 'mg/kg'),
'AMN': ('有效锰', 'mg/kg'),
'OM': ('有机质', 'g/kg'),
'GZCHD': ('耕层厚度', 'cm'),
'AK': ('速效钾', 'mg/kg'),
'CEC': ('阳离子交换量', 'cmol/kg'),
# 特殊指标 - 根据文件名对应字段
'FL': ('粉粒', '%'),
'NL': ('黏粒', '%'),
'SL': ('砂粒', '%'),
'TRRZPJZ': ('土壤容重', 'g/cm³'),
'TRZD': ('土壤质地', '分类'),
# 其他可能指标
'AMO': ('有效钼', 'mg/kg'),
'TSE': ('全硒', 'mg/kg'),
'YXTCHD': ('有效土层厚度', 'cm')
}
# 文件名到字段的映射
FILENAME_TO_FIELD = {
'粉粒': 'FL',
'黏粒': 'NL',
'砂粒': 'SL',
'表层容重': 'TRRZPJZ',
'土壤质地十二级分类': 'TRZD',
'双江县YXTCHD': 'YXTCHD'
}
# 扩展字段别名映射支持更多pH字段名
FIELD_ALIASES = {
'PH': ['pH', 'PH', 'ph'], # 支持pH的各种大小写形式
'ECA': ['交换性钙', 'ECA'],
'EMG': ['交换性镁', 'EMG'],
'TN': ['全氮', 'TN'],
'TP': ['全磷', 'TP'],
'TK': ['全钾', 'TK'],
'AS1': ['有效硫', 'AS1'],
'AB': ['有效硼', 'AB'],
'AP': ['有效磷', 'AP'],
'AFE': ['有效铁', 'AFE'],
'ACU': ['有效铜', 'ACU'],
'AZN': ['有效锌', 'AZN'],
'AMN': ['有效锰', 'AMN'],
'OM': ['有机质', 'OM'],
'GZCHD': ['耕层厚度', 'GZCHD'],
'AK': ['速效钾', 'AK'],
'CEC': ['阳离子交换量', 'CEC'],
'FL': ['粉粒', 'FL'],
'NL': ['黏粒', 'NL'],
'SL': ['砂粒', 'SL'],
'TRRZPJZ': ['土壤容重', 'TRRZPJZ'],
'TRZD': ['土壤质地', 'TRZD'],
'AMO': ['有效钼', 'AMO'],
'TSE': ['全硒', 'TSE'],
'YXTCHD': ['有效土层厚度', 'YXTCHD']
}
def find_shapefiles(folder_path):
"""在文件夹中递归查找所有的Shapefile文件"""
shapefiles = []
for root, dirs, files in os.walk(folder_path):
for file in files:
if file.lower().endswith('.shp'):
shapefiles.append(os.path.join(root, file))
return shapefiles
def read_shapefile_data(shapefile_path):
"""读取Shapefile数据并返回属性表"""
try:
print(f" 读取Shapefile: {os.path.basename(shapefile_path)}")
gdf = gpd.read_file(shapefile_path, encoding='utf-8')
print(f" 要素数量: {len(gdf)}")
print(f" 属性字段: {list(gdf.columns)}")
return gdf
except Exception as e:
print(f" 读取Shapefile失败: {e}")
try:
gdf = gpd.read_file(shapefile_path, encoding='gbk')
print(f" 使用GBK编码成功读取")
return gdf
except:
return None
def get_indicator_data(gdf, filename):
"""从GeoDataFrame中获取指标数据使用统一字段匹配逻辑"""
indicator_data = {}
basename = os.path.basename(filename).replace('.shp', '')
# 1. 首先尝试文件名映射
target_field = None
if basename in FILENAME_TO_FIELD:
target_field = FILENAME_TO_FIELD[basename]
if target_field in gdf.columns:
indicator_data[target_field] = gdf[target_field]
print(f" 通过文件名映射找到字段: {target_field}")
else:
# 尝试通过别名查找
for indicator_code in INDICATOR_UNITS.keys():
if target_field == indicator_code:
for alias in FIELD_ALIASES.get(indicator_code, []):
if alias in gdf.columns:
indicator_data[indicator_code] = gdf[alias]
print(f" 通过文件名映射+别名找到字段: {alias} -> {indicator_code}")
break
# 2. 如果没有通过文件名找到,尝试直接匹配所有指标和别名
if not indicator_data:
for indicator_code in INDICATOR_UNITS.keys():
# 先尝试直接匹配指标代码
if indicator_code in gdf.columns:
indicator_data[indicator_code] = gdf[indicator_code]
print(f" 直接匹配字段: {indicator_code}")
continue
# 再尝试匹配别名
aliases = FIELD_ALIASES.get(indicator_code, [])
for alias in aliases:
if alias in gdf.columns:
indicator_data[indicator_code] = gdf[alias]
print(f" 通过别名匹配: {alias} -> {indicator_code}")
break
# 3. 额外检查:如果文件名包含特定关键词,尝试匹配
if not indicator_data:
filename_lower = basename.lower()
for indicator_code, (chinese_name, unit) in INDICATOR_UNITS.items():
if indicator_code.lower() in filename_lower or chinese_name in filename_lower:
# 尝试匹配指标代码或中文名
if indicator_code in gdf.columns:
indicator_data[indicator_code] = gdf[indicator_code]
print(f" 通过文件名关键词匹配: {indicator_code}")
break
elif chinese_name in gdf.columns:
indicator_data[indicator_code] = gdf[chinese_name]
print(f" 通过文件名关键词匹配中文名: {chinese_name} -> {indicator_code}")
break
return indicator_data
def get_combined_stats_from_folder(folder_path, folder_name="数据"):
"""从文件夹中所有shapefile合并统计指定指标"""
shapefiles = find_shapefiles(folder_path)
if not shapefiles:
print(f" 未找到Shapefile文件")
return pd.DataFrame()
print(f" 找到 {len(shapefiles)} 个Shapefile文件")
all_data = {code: [] for code in INDICATOR_UNITS.keys()}
for i, shp_file in enumerate(shapefiles, 1):
print(f"\n [{i}] 处理文件: {os.path.basename(shp_file)}")
gdf = read_shapefile_data(shp_file)
if gdf is not None:
indicator_data = get_indicator_data(gdf, shp_file)
for indicator_code, data_series in indicator_data.items():
if indicator_code in all_data:
# 转换为数值类型,处理可能的非数值数据
try:
data_series = pd.to_numeric(data_series, errors='coerce')
valid_data = data_series.dropna()
if len(valid_data) > 0:
all_data[indicator_code].extend(valid_data.tolist())
print(f" 提取 {indicator_code}: {len(valid_data)} 个值")
except Exception as e:
print(f" 处理 {indicator_code} 数据时出错: {e}")
# 计算每个指标的合并统计
stats_list = []
for indicator_code, (chinese_name, unit) in INDICATOR_UNITS.items():
data_list = all_data.get(indicator_code, [])
if not data_list:
continue
data_series = pd.Series(data_list)
# 过滤极端值(可选,根据实际需求调整)
data_series = data_series[(data_series >= 0) | pd.isna(data_series)]
if len(data_series) == 0:
continue
# 关键修复1计算总体标准差ddof=0而不是默认的样本标准差ddof=1
std_dev = data_series.std(ddof=0)
mean_val = data_series.mean()
# 关键修复2优化变异系数计算
if abs(mean_val) < 1e-8: # 均值接近0时
cv_value = 0.0
else:
# CV = (标准差 / 均值) * 100保留2位小数
cv_value = round((std_dev / mean_val) * 100, 2)
stats = {
'指标代码': indicator_code,
'指标': chinese_name,
'单位': unit,
'样点数': int(len(data_series)),
'Min': round(float(data_series.min()), 2),
'Max': round(float(data_series.max()), 2),
'Mean': round(float(mean_val), 2),
'Std': round(float(std_dev), 2), # 使用总体标准差
'CV': cv_value
}
stats_list.append(stats)
print(f" 统计 {chinese_name}({indicator_code}): {len(data_series)} 个样点")
if stats_list:
stats_df = pd.DataFrame(stats_list)
stats_df = stats_df.sort_values('指标')
print(f"\n 总共统计到 {len(stats_df)} 个指标")
return stats_df
print(" 未找到任何指标数据")
return pd.DataFrame()
def create_statistics_excel(before_folder, after_folder, output_path):
"""创建融合的统计表格,在剔除后表格前加一列剔除前样点数和剔除样点数"""
workbook = Workbook()
# 移除默认sheet
if 'Sheet' in workbook.sheetnames:
default_sheet = workbook['Sheet']
workbook.remove(default_sheet)
# 定义样式
thin_border = Border(
left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin')
)
print("=" * 60)
print("开始分析样点数据")
print("=" * 60)
# 分析剔除前数据
before_stats = None
if os.path.exists(before_folder):
print(f"\n[1] 分析剔除前数据:")
print(f"文件夹路径: {before_folder}")
before_stats = get_combined_stats_from_folder(before_folder, "剔除前")
if not before_stats.empty:
print(f"✓ 剔除前统计完成: {len(before_stats)} 个指标")
else:
print("✗ 剔除前未找到指定指标数据")
before_stats = None
else:
print(f"✗ 剔除前文件夹不存在: {before_folder}")
before_stats = None
# 分析剔除后数据
if os.path.exists(after_folder):
print(f"\n[2] 分析剔除后数据:")
print(f"文件夹路径: {after_folder}")
after_stats = get_combined_stats_from_folder(after_folder, "剔除后")
if not after_stats.empty:
# 创建融合的统计工作表
sheet_combined = workbook.create_sheet(title="样点统计")
# 新的表头:指标, 单位, 剔除前样点数, 剔除样点数, 剔除后样点数, Min, Max, Mean, Std, CV
combined_headers = ['指标', '单位', '剔除前样点数', '剔除样点数', '剔除后样点数', 'Min', 'Max', 'Mean',
'Std', 'CV']
for col, header in enumerate(combined_headers, 1):
cell = sheet_combined.cell(row=1, column=col, value=header)
cell.alignment = Alignment(horizontal='center', vertical='center')
cell.font = Font(bold=True)
cell.border = thin_border
# 写入数据
for row_idx, (index, after_row) in enumerate(after_stats.iterrows(), start=2):
# 查找对应的剔除前数据
before_sample_count = 0
before_row = None
if before_stats is not None:
# 首先尝试通过指标代码匹配
matching_rows = before_stats[before_stats['指标代码'] == after_row['指标代码']]
if not matching_rows.empty:
before_row = matching_rows.iloc[0]
else:
# 如果指标代码匹配失败,尝试通过指标名称匹配
matching_rows = before_stats[before_stats['指标'] == after_row['指标']]
if not matching_rows.empty:
before_row = matching_rows.iloc[0]
if before_row is not None:
before_sample_count = int(before_row['样点数'])
# 计算剔除样点数
after_sample_count = int(after_row['样点数'])
abnormal_count = max(0, before_sample_count - after_sample_count)
# 写入数据
sheet_combined.cell(row=row_idx, column=1, value=after_row['指标']) # 指标
sheet_combined.cell(row=row_idx, column=2, value=after_row['单位']) # 单位
sheet_combined.cell(row=row_idx, column=3, value=before_sample_count) # 剔除前样点数
sheet_combined.cell(row=row_idx, column=4, value=abnormal_count) # 剔除样点数
sheet_combined.cell(row=row_idx, column=5, value=after_sample_count) # 剔除后样点数
sheet_combined.cell(row=row_idx, column=6, value=after_row['Min']) # Min
sheet_combined.cell(row=row_idx, column=7, value=after_row['Max']) # Max
sheet_combined.cell(row=row_idx, column=8, value=after_row['Mean']) # Mean
sheet_combined.cell(row=row_idx, column=9, value=after_row['Std']) # Std
sheet_combined.cell(row=row_idx, column=10, value=after_row['CV']) # CV
# 设置所有单元格的样式
for col_idx in range(1, 11):
cell = sheet_combined.cell(row=row_idx, column=col_idx)
cell.alignment = Alignment(horizontal='center', vertical='center')
cell.border = thin_border
# 如果剔除了样点,高亮显示剔除样点数列
if abnormal_count > 0:
cell = sheet_combined.cell(row=row_idx, column=4) # 剔除样点数列
cell.font = Font(bold=True, color="FF0000") # 红色加粗
# 调整列宽
combined_column_widths = {
'指标': 15,
'单位': 12,
'剔除前样点数': 12,
'剔除样点数': 12,
'剔除后样点数': 12,
'Min': 10,
'Max': 10,
'Mean': 10,
'Std': 10,
'CV': 10
}
for col_idx, col_name in enumerate(combined_headers, 1):
column_letter = get_column_letter(col_idx)
if col_name in combined_column_widths:
sheet_combined.column_dimensions[column_letter].width = combined_column_widths[col_name]
print(f"\n✓ 融合统计完成: {len(after_stats)} 个指标")
# 输出匹配信息
if before_stats is not None:
print(f" 剔除前找到 {len(before_stats)} 个指标")
print(f" 剔除后找到 {len(after_stats)} 个指标")
print(
f" 成功匹配 {len([i for i in range(2, len(after_stats) + 2) if sheet_combined.cell(row=i, column=3).value > 0])} 个指标的剔除前数据")
else:
print("✗ 剔除后未找到指定指标数据")
sheet_combined = workbook.create_sheet(title="样点统计")
sheet_combined.cell(row=1, column=1, value="未找到指定指标数据")
else:
print(f"✗ 剔除后文件夹不存在: {after_folder}")
sheet_combined = workbook.create_sheet(title="样点统计")
sheet_combined.cell(row=1, column=1, value="剔除后文件夹不存在")
# 保存文件
workbook.save(output_path)
print(f"\n" + "=" * 60)
print(f"文件保存成功: {output_path}")
print("=" * 60)
# ================ 使用示例 ================
if __name__ == "__main__":
# 方式1: 处理单个样点数据对
before_folder = r"D:\a陆平\1.5实验数据\12月新版实验室数据\云南省实验室数据成果1218\永仁县20260127"
after_folder = r"D:\a陆平\1.5实验数据\12月新版实验室数据\云南省实验室数据成果1218\永仁县20260127剔除后"
output_path = r"D:\a陆平\1.5实验数据\12月新版实验室数据\云南省实验室数据成果1218\永仁县样点统计结果.xlsx"
# 执行
create_statistics_excel(before_folder, after_folder, output_path)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,932 @@
# 生成完整的exactextract面积加权计算Python脚本
# -*- coding: utf-8 -*-
"""
土壤属性栅格数据面积加权统计脚本
基于exactextract库实现多属性栅格的面积加权平均值计算
最终输出格式与土壤属性图斑数据表一致
"""
import exactextract as ee
import geopandas as gpd
import rasterio
import pandas as pd
import numpy as np
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')
def init_logger():
"""初始化日志输出,便于跟踪处理过程"""
import logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
return logging.getLogger(__name__)
def validate_data(vector_path, raster_files):
"""
验证输入数据的有效性
:param vector_path: 矢量图斑文件路径
:param raster_files: 栅格文件字典
:return: 验证结果(布尔值)
"""
logger = init_logger()
# 验证矢量文件是否存在
if not Path(vector_path).exists():
logger.error(f"矢量文件不存在:{vector_path}")
return False
# 验证栅格文件是否存在
for attr_name, raster_path in raster_files.items():
if not Path(raster_path).exists():
logger.error(f"栅格文件不存在:{attr_name} -> {raster_path}")
return False
# 验证矢量文件格式
try:
gdf = gpd.read_file(vector_path)
if gdf.geometry.type.unique()[0] != 'Polygon':
logger.error("矢量文件必须是Polygon类型面要素")
return False
except Exception as e:
logger.error(f"矢量文件读取失败:{str(e)}")
return False
# 验证栅格文件格式
try:
test_raster = next(iter(raster_files.values()))
with rasterio.open(test_raster) as src:
if src.count != 1:
logger.error("每个栅格文件必须是单波段(每个属性单独一个栅格)")
return False
except Exception as e:
logger.error(f"栅格文件读取失败:{str(e)}")
return False
logger.info("所有输入数据验证通过")
return True
def standardize_crs(gdf, raster_path):
"""
标准化矢量与栅格的坐标参考系统CRS
:param gdf: 矢量GeoDataFrame
:param raster_path: 任意一个栅格文件路径用于获取目标CRS
:return: 标准化后的GeoDataFrame
"""
logger = init_logger()
# 获取栅格CRS
with rasterio.open(raster_path) as src:
raster_crs = src.crs
raster_crs_str = src.crs.to_string()
# 获取矢量CRS
vector_crs = gdf.crs
vector_crs_str = gdf.crs.to_string()
logger.info(f"当前矢量CRS{vector_crs_str}")
logger.info(f"目标栅格CRS{raster_crs_str}")
# 若CRS不一致进行转换
if vector_crs != raster_crs:
logger.warning("矢量与栅格CRS不一致正在进行转换...")
gdf = gdf.to_crs(raster_crs)
logger.info(f"CRS转换完成新矢量CRS{gdf.crs.to_string()}")
return gdf
def calculate_area_weighted_stats(vector_path, raster_files, output_path):
"""
核心函数:计算面积加权统计值
:param vector_path: 矢量图斑文件路径
:param raster_files: 栅格文件字典(键:属性名,值:栅格路径)
:param output_path: 结果输出路径Excel文件
:return: 统计结果DataFrame
"""
logger = init_logger()
logger.info("开始面积加权统计计算")
# 1. 加载矢量数据
logger.info(f"加载矢量数据:{vector_path}")
gdf = gpd.read_file(vector_path)
# 2. 标准化CRS
test_raster = next(iter(raster_files.values()))
gdf = standardize_crs(gdf, test_raster)
# 3. 初始化结果DataFrame保留矢量中的关键属性
logger.info("初始化结果数据结构")
# 基础字段列表(与土壤属性图斑表格式对齐)
# TODO
base_fields = ["FID","DM","XZM","QSDWDM","QSDWMC","TL", "YL", "TS", "TZ", "DLBM", "DLMC"]
# 检查矢量中是否包含必要字段,若不包含则创建空字段
result_df = pd.DataFrame()
for field in base_fields:
if field in gdf.columns:
result_df[field] = gdf[field]
else:
result_df[field] = np.nan
logger.warning(f"矢量中缺少'{field}'字段,将生成空值")
# 4. 计算图斑面积(转换为亩)
logger.info("计算图斑面积(单位:亩)")
# 计算平方米面积根据CRS单位自动适应
gdf["area_sqm"] = gdf.geometry.area
# 转换为亩1亩 ≈ 666.6667平方米)
result_df["面积亩"] = gdf["area_sqm"] * 0.0015
# 保留6位小数与示例数据格式一致
result_df["面积亩"] = result_df["面积亩"].round(6)
# 5. 对每个土壤属性进行面积加权平均计算
logger.info("开始处理土壤属性栅格(面积加权平均)")
for attr_idx, (attr_name, raster_path) in enumerate(raster_files.items(), 1):
total_attrs = len(raster_files)
logger.info(f"处理进度:{attr_idx}/{total_attrs} - 属性:{attr_name}")
try:
# 使用exactextract计算面积加权平均
# weights="area":按矢量与栅格的交集面积进行加权
stats = ee.exact_extract(
raster_path,
gdf,
["mean"], # 计算平均值
output="pandas" # 输出为DataFrame格式
)
# 将统计结果添加到结果DataFrame
if stats is None:
logger.warning(f"{attr_name}计算结果为空,可能无交集区域")
result_df[attr_name] = np.nan
continue
else:
# 确保 stats 为 pandas.DataFrame以便使用字符串索引
if not isinstance(stats, pd.DataFrame):
stats = pd.DataFrame(stats)
# 保留4位小数确保数据精度
result_df[attr_name] = stats["mean"].round(4)
# 处理可能的空值(无交集区域)
if result_df[attr_name].isnull().sum() > 0:
null_count = result_df[attr_name].isnull().sum()
logger.warning(f"{attr_name}存在{null_count}个空值(图斑与栅格无交集)")
# 用0填充空值可根据业务需求调整
result_df[attr_name] = result_df[attr_name].fillna(0)
except Exception as e:
logger.error(f"{attr_name}处理失败:{str(e)}")
# 失败时填充空值,避免整个程序崩溃
result_df[attr_name] = np.nan
# 6. 添加属性分级字段(根据业务规则实现)
logger.info("添加土壤属性分级字段")
result_df = add_attribute_classification(result_df)
# 7. 整理最终字段顺序(与示例表格完全对齐)
logger.info("整理输出字段顺序")
# TODO
final_columns = [
"FID","DM", "XZM", "QSDWDM", "QSDWMC",
"TL", "YL", "TS", "TZ", "DLBM", "DLMC",
"耕层厚度", "土壤容重", "砂粒", "粉粒", "黏粒", "酸碱度", "阳离子",
"有机质", "全氮", "全磷", "全钾", "有效磷", "速效钾", "有效铁", "有效锰",
"有效铜", "有效锌", "有效硼", "有效钼", "有效硫", "交换性钙", "交换性镁", "全硒",
"有效土层厚度", "土壤质地",
"耕层厚度分级", "土壤容重分级", "砂粒分级", "粉粒分级", "黏粒分级", "酸碱度分级", "阳离子分级",
"有机质分级", "全氮分级", "全磷分级", "全钾分级", "有效铁分级", "速效钾分级", "有效铁分级", "有效锰分级",
"有效铜分级", "有效锌分级", "有效硼分级", "有效钼分级", "有效硫分级", "交换性钙分级", "交换性镁分级", "全硒分级",
"有效土层厚度分级", "土壤质地分级",
"面积亩"
]
# 补充缺失的字段(如乡代码等)
for col in final_columns:
if col not in result_df.columns:
result_df[col] = np.nan
# 按最终顺序排列字段
result_df = result_df[final_columns]
# 8. 导出结果到Excel
logger.info(f"导出结果到:{output_path}")
# 使用openpyxl引擎支持.xlsx格式
result_df.to_excel(output_path, index=False, engine="openpyxl")
logger.info(f"结果导出完成,共生成{len(result_df)}条记录")
return result_df
def add_attribute_classification(df):
"""
添加土壤属性分级字段(根据常见土壤分类标准实现)
可根据实际业务需求调整分级阈值
:param df: 包含原始属性的DataFrame
:return: 包含分级字段的DataFrame
"""
# todo
# 1. 酸碱度分级pH值 - 按PH标准
def classify_ph(ph):
"""
土壤pH值分级第三次全国土壤普查标准
标准等级:
等级一: 6.07.0
等级二: 7.07.5, 5.56.0
等级三: 7.58.0, 5.05.5
等级四: 8.08.5, 4.55.0
等级五: 8.5, ≤4.5
"""
if ph > 8.5 or ph <= 4.5:
return 5 # 等级五: 8.5, ≤4.5
elif (8.0 < ph <= 8.5) or (4.5 < ph <= 5.0):
return 4 # 等级四: 8.08.5, 4.55.0
elif (7.5 < ph <= 8.0) or (5.0 < ph <= 5.5):
return 3 # 等级三: 7.58.0, 5.05.5
elif (7.0 < ph <= 7.5) or (5.5 < ph <= 6.0):
return 2 # 等级二: 7.07.5, 5.56.0
elif 6.0 < ph <= 7.0:
return 1 # 等级一: 6.07.0
else:
return None # 异常值
# 2. 有机质分级单位g/kg - 按OM标准
def classify_organic(organic):
"""
土壤有机质分级(第三次全国土壤普查标准)
标准等级:
等级一: 35.0
等级二: 25.035.0
等级三: 15.025.0
等级四: 10.015.0
等级五: ≤10.0
"""
if organic > 35.0:
return 1 # 等级一: 35.0
elif 25.0 < organic <= 35.0:
return 2 # 等级二: 25.035.0
elif 15.0 < organic <= 25.0:
return 3 # 等级三: 15.025.0
elif 10.0 < organic <= 15.0:
return 4 # 等级四: 10.015.0
elif organic <= 10.0:
return 5 # 等级五: ≤10.0
else:
return None # 异常值
# 3. 阳离子交换量分级单位cmol/kg - 按CEC标准
def classify_cation(cation):
"""
土壤阳离子交换量分级(第三次全国土壤普查标准)
标准等级:
等级一: 30.0
等级二: 20.030.0
等级三: 15.020.0
等级四: 10.015.0
等级五: ≤10.0
"""
if cation > 30.0:
return 1 # 等级一: 30.0
elif 20.0 < cation <= 30.0:
return 2 # 等级二: 20.030.0
elif 15.0 < cation <= 20.0:
return 3 # 等级三: 15.020.0
elif 10.0 < cation <= 15.0:
return 4 # 等级四: 10.015.0
elif cation <= 10.0:
return 5 # 等级五: ≤10.0
else:
return None # 异常值
# 4. 有效磷分级单位mg/kg - 按AP标准
def classify_available_p(p):
"""
土壤有效磷分级(第三次全国土壤普查标准)
标准等级:
等级一: 40.0
等级二: 25.040.0
等级三: 15.025.0
等级四: 5.015.0
等级五: ≤5.0
"""
if p > 40.0:
return 1 # 等级一: 40.0
elif 25.0 < p <= 40.0:
return 2 # 等级二: 25.040.0
elif 15.0 < p <= 25.0:
return 3 # 等级三: 15.025.0
elif 5.0 < p <= 15.0:
return 4 # 等级四: 5.015.0
elif p <= 5.0:
return 5 # 等级五: ≤5.0
else:
return None # 异常值
# 5. 速效钾分级单位mg/kg - 按AK标准
def classify_available_k(k):
"""
土壤速效钾分级(第三次全国土壤普查标准)
标准等级:
等级一: 150
等级二: 100150
等级三: 75100
等级四: 5075
等级五: ≤50
"""
if k > 150:
return 1 # 等级一: 150
elif 100 < k <= 150:
return 2 # 等级二: 100150
elif 75 < k <= 100:
return 3 # 等级三: 75100
elif 50 < k <= 75:
return 4 # 等级四: 5075
elif k <= 50:
return 5 # 等级五: ≤50
else:
return None # 异常值
# 6. 耕层厚度分级单位cm - 按GZCHD标准
def classify_soil_depth(depth):
"""
土壤耕作层厚度分级(第三次全国土壤普查标准)
标准等级:
等级一: 25.0
等级二: 20.025.0
等级三: 15.020.0
等级四: 10.015.0
等级五: ≤10.0
"""
if depth > 25.0:
return 1 # 等级一: 25.0
elif 20.0 < depth <= 25.0:
return 2 # 等级二: 20.025.0
elif 15.0 < depth <= 20.0:
return 3 # 等级三: 15.020.0
elif 10.0 < depth <= 15.0:
return 4 # 等级四: 10.015.0
elif depth <= 10.0:
return 5 # 等级五: ≤10.0
else:
return None # 异常值
# 7. 土壤容重分级单位g/cm³ - 按TRRZ标准
def classify_bulk_density(density):
"""
土壤容重分级(第三次全国土壤普查标准)
标准等级:
等级一: 1.101.25
等级二: 1.251.35, 1.001.10
等级三: 1.351.45
等级四: 1.451.55, 0.901.00
等级五: 1.55, ≤0.90
"""
if 1.10 < density <= 1.25:
return 1 # 等级一: 1.101.25
elif (1.25 < density <= 1.35) or (1.00 < density <= 1.10):
return 2 # 等级二: 1.251.35, 1.001.10
elif 1.35 < density <= 1.45:
return 3 # 等级三: 1.351.45
elif (1.45 < density <= 1.55) or (0.90 < density <= 1.00):
return 4 # 等级四: 1.451.55, 0.901.00
elif density > 1.55 or density <= 0.90:
return 5 # 等级五: 1.55, ≤0.90
else:
return None # 异常值
# 8. 全氮分级单位g/kg - 按TN标准
def classify_total_n(n):
"""
土壤全氮分级(第三次全国土壤普查标准)
标准等级:
等级一: 2.00
等级二: 1.502.00
等级三: 1.001.50
等级四: 0.501.00
等级五: ≤0.50
"""
if n > 2.00:
return 1 # 等级一: 2.00
elif 1.50 < n <= 2.00:
return 2 # 等级二: 1.502.00
elif 1.00 < n <= 1.50:
return 3 # 等级三: 1.001.50
elif 0.50 < n <= 1.00:
return 4 # 等级四: 0.501.00
elif n <= 0.50:
return 5 # 等级五: ≤0.50
else:
return None # 异常值
# 9. 全磷分级单位g/kg - 按TP标准
def classify_total_p(p):
"""
土壤全磷分级(第三次全国土壤普查标准)
标准等级:
等级一: 1.00
等级二: 0.801.00
等级三: 0.600.80
等级四: 0.400.60
等级五: ≤0.40
"""
if p > 1.00:
return 1 # 等级一: 1.00
elif 0.80 < p <= 1.00:
return 2 # 等级二: 0.801.00
elif 0.60 < p <= 0.80:
return 3 # 等级三: 0.600.80
elif 0.40 < p <= 0.60:
return 4 # 等级四: 0.400.60
elif p <= 0.40:
return 5 # 等级五: ≤0.40
else:
return None # 异常值
# 10. 全钾分级单位g/kg - 按TK标准
def classify_total_k(k):
"""
土壤全钾分级(第三次全国土壤普查标准)
标准等级:
等级一: 20.0
等级二: 15.020.0
等级三: 10.015.0
等级四: 5.010.0
等级五: ≤5.0
"""
if k > 20.0:
return 1 # 等级一: 20.0
elif 15.0 < k <= 20.0:
return 2 # 等级二: 15.020.0
elif 10.0 < k <= 15.0:
return 3 # 等级三: 10.015.0
elif 5.0 < k <= 10.0:
return 4 # 等级四: 5.010.0
elif k <= 5.0:
return 5 # 等级五: ≤5.0
else:
return None # 异常值
# 11. 有效铁分级单位mg/kg
def classify_available_fe(fe):
"""
土壤有效铁分级(第三次全国土壤普查标准)
参数:
fe: 有效铁含量 (mg/kg)
返回:
分级等级 (1-5)
"""
if fe <= 3.0:
return 5 # 等级五: ≤3.0
elif 3.0 < fe <= 5.0:
return 4 # 等级四: 3.05.0
elif 5.0 < fe <= 10.0:
return 3 # 等级三: 5.010.0
elif 10.0 < fe <= 20.0:
return 2 # 等级二: 10.020.0
else: # fe > 20.0
return 1 # 等级一: 20.0
# 12. 有效锌分级单位mg/kg
def classify_available_zn(zn):
"""
土壤有效锌分级(第三次全国土壤普查标准)
参数:
zn: 有效锌含量 (mg/kg)
返回:
分级等级 (1-5)
"""
if zn <= 0.20:
return 5 # 等级五: ≤0.20
elif 0.20 < zn <= 0.50:
return 4 # 等级四: 0.200.50
elif 0.50 < zn <= 1.00:
return 3 # 等级三: 0.501.00
elif 1.00 < zn <= 3.00:
return 2 # 等级二: 1.003.00
else: # zn > 3.00
return 1 # 等级一: 3.00
# 13. 有效锰分级单位mg/kg
def classify_available_mn(mn):
"""
土壤有效锰分级(第三次全国土壤普查标准)
参数:
mn: 有效锰含量 (mg/kg)
返回:
分级等级 (1-5)
"""
if mn <= 1.0:
return 5 # 等级五: ≤1.0
elif 1.0 < mn <= 5.0:
return 4 # 等级四: 1.05.0
elif 5.0 < mn <= 15.0:
return 3 # 等级三: 5.015.0
elif 15.0 < mn <= 30.0:
return 2 # 等级二: 15.030.0
else: # mn > 30.0
return 1 # 等级一: 30.0
# 14. 有效铜分级单位mg/kg
def classify_available_cu(cu):
"""
土壤有效铜分级(第三次全国土壤普查标准)
参数:
cu: 有效铜含量 (mg/kg)
返回:
分级等级 (1-5)
"""
if cu <= 0.20:
return 5 # 等级五: ≤0.20
elif 0.20 < cu <= 0.50:
return 4 # 等级四: 0.200.50
elif 0.50 < cu <= 1.00:
return 3 # 等级三: 0.501.00
elif 1.00 < cu <= 2.00:
return 2 # 等级二: 1.002.00
else: # cu > 2.00
return 1 # 等级一: 2.00
# 15. 有效硼分级单位mg/kg
def classify_available_b(b):
"""
土壤有效硼分级(第三次全国土壤普查标准)
参数:
b: 有效硼含量 (mg/kg)
返回:
分级等级 (1-5)
"""
if b <= 0.20:
return 5 # 等级五: ≤0.20
elif 0.20 < b <= 0.50:
return 4 # 等级四: 0.200.50
elif 0.50 < b <= 0.80:
return 3 # 等级三: 0.500.80
elif 0.80 < b <= 1.00:
return 2 # 等级二: 0.801.00
else: # b > 1.00
return 1 # 等级一: 1.00
# 16. 有效钼分级单位mg/kg
def classify_available_mo(mo):
"""
土壤有效钼分级(第三次全国土壤普查标准)
参数:
mo: 有效钼含量 (mg/kg)
返回:
分级等级 (1-5)
"""
if mo <= 0.05:
return 5 # 等级五: ≤0.05
elif 0.05 < mo <= 0.10:
return 4 # 等级四: 0.050.10
elif 0.10 < mo <= 0.15:
return 3 # 等级三: 0.100.15
elif 0.15 < mo <= 0.20:
return 2 # 等级二: 0.150.20
else: # mo > 0.20
return 1 # 等级一: 0.20
# 17. 有效硫分级单位mg/kg
def classify_available_s(s):
"""
土壤有效硫分级(第三次全国土壤普查标准)
参数:
s: 有效硫含量 (mg/kg)
返回:
分级等级 (1-5)
"""
if s <= 10.0:
return 5 # 等级五: ≤10.0
elif 10.0 < s <= 20.0:
return 4 # 等级四: 10.020.0
elif 20.0 < s <= 30.0:
return 3 # 等级三: 20.030.0
elif 30.0 < s <= 40.0:
return 2 # 等级二: 30.040.0
else: # s > 40.0
return 1 # 等级一: 40.0
# 18. 交换性钙分级单位cmol(½Ca²⁺)/kg
def classify_exchangeable_ca(ca):
"""
土壤交换性钙分级(第三次全国土壤普查标准)
参数:
ca: 交换性钙含量 (cmol(½Ca²⁺)/kg)
返回:
分级等级 (1-5)
"""
if ca <= 1.00:
return 5 # 等级五: ≤1.00
elif 1.00 < ca <= 2.50:
return 4 # 等级四: 1.002.50
elif 2.50 < ca <= 4.99:
return 3 # 等级三: 2.504.99
elif 4.99 < ca <= 7.49:
return 2 # 等级二: 4.997.49
else: # ca > 7.49
return 1 # 等级一: 7.49
# 19. 交换性镁分级单位cmol(½Mg²⁺)/kg
def classify_exchangeable_mg(mg):
"""
土壤交换性镁分级(第三次全国土壤普查标准)
参数:
mg: 交换性镁含量 (cmol(½Mg²⁺)/kg)
返回:
分级等级 (1-5)
"""
if mg <= 0.41:
return 5 # 等级五: ≤0.41
elif 0.41 < mg <= 0.82:
return 4 # 等级四: 0.410.82
elif 0.82 < mg <= 1.23:
return 3 # 等级三: 0.821.23
elif 1.23 < mg <= 1.64:
return 2 # 等级二: 1.231.64
else: # mg > 1.64
return 1 # 等级一: 1.64
# 20. 全硒分级单位mg/kg
def classify_total_se(se):
"""
土壤全硒分级(第三次全国土壤普查标准)
参数:
se: 全硒含量 (mg/kg)
返回:
分级等级 (1-4)
"""
if se <= 0.17:
return 4 # 等级四: ≤0.17
elif 0.17 < se <= 0.40:
return 3 # 等级三: 0.170.40
elif 0.40 < se <= 3.00:
return 2 # 等级二: 0.403.00
else: # se > 3.00
return 1 # 等级一: 3.00
# 21. 粉粒含量分级(单位:%
def classify_silt(silt):
"""
土壤粉粒含量分级(第三次全国土壤普查标准)
参数:
silt: 粉粒含量 (%)
返回:
分级等级 (1-5)
"""
if silt > 75:
return 5 # 等级五: 75
elif 45 < silt <= 75:
return 4 # 等级四: 4575
elif 30 < silt <= 45:
return 3 # 等级三: 3045
elif 15 < silt <= 30:
return 2 # 等级二: 1530
else: # silt <= 15
return 1 # 等级一: ≤15
# 22. 黏粒含量分级(单位:%
def classify_clay(clay):
"""
土壤黏粒含量分级(第三次全国土壤普查标准)
参数:
clay: 黏粒含量 (%)
返回:
分级等级 (1-5)
"""
if clay > 65:
return 5 # 等级五: 65
elif 45 < clay <= 65:
return 4 # 等级四: 4565
elif 25 < clay <= 45:
return 3 # 等级三: 2545
elif 15 < clay <= 25:
return 2 # 等级二: 1525
else: # clay <= 15
return 1 # 等级一: ≤15
# 23. 砂粒含量分级(单位:%
def classify_sand(sand):
"""
土壤砂粒含量分级(第三次全国土壤普查标准)
参数:
sand: 砂粒含量 (%)
返回:
分级等级 (1-5)
"""
if sand > 85:
return 5 # 等级五: 85
elif 55 < sand <= 85:
return 4 # 等级四: 5585
elif 40 < sand <= 55:
return 3 # 等级三: 4055
elif 30 < sand <= 40:
return 2 # 等级二: 3040
else: # sand <= 30
return 1 # 等级一: ≤30
# 24. 有效土层厚度分级单位cm
def classify_yxtchd(depth):
"""
土壤有效土层厚度分级(第三次全国土壤普查标准)
参数:
depth: 有效土层厚度 (cm)
返回:
分级等级 (1-5)
"""
if depth <= 40:
return 5 # 等级五: ≤40
elif 40 < depth <= 60:
return 4 # 等级四: 4060
elif 60 < depth <= 80:
return 3 # 等级三: 6080
elif 80 < depth <= 100:
return 2 # 等级二: 80100
else: # depth > 100
return 1 # 等级一: 100
# 25. 土壤质地
def classify_trzd(trzd):
"""
土壤质地
参数:
trzd: 土壤质地分类1-5
返回:
分级等级 (1-5)
"""
trzd = round(trzd, 0)
if trzd == 5:
return 5 # 等级五: ≤40
elif trzd == 4:
return 4 # 等级四: 4060
elif trzd == 3:
return 3 # 等级三: 6080
elif trzd == 2:
return 2 # 等级二: 80100
else: # depth > 100
return 1 # 等级一: 100
# 应用分级函数(只处理非空值)
if "酸碱度" in df.columns:
df["酸碱度分级"] = df["酸碱度"].apply(lambda x: classify_ph(x) if pd.notna(x) else np.nan)
if "有机质" in df.columns:
df["有机质分级"] = df["有机质"].apply(lambda x: classify_organic(x) if pd.notna(x) else np.nan)
if "阳离子" in df.columns:
df["阳离子分级"] = df["阳离子"].apply(lambda x: classify_cation(x) if pd.notna(x) else np.nan)
if "有效磷" in df.columns:
df["有效磷分级"] = df["有效磷"].apply(lambda x: classify_available_p(x) if pd.notna(x) else np.nan)
if "速效钾" in df.columns:
df["速效钾分级"] = df["速效钾"].apply(lambda x: classify_available_k(x) if pd.notna(x) else np.nan)
if "耕层厚度" in df.columns:
df["耕层厚度分级"] = df["耕层厚度"].apply(lambda x: classify_soil_depth(x) if pd.notna(x) else np.nan)
if "土壤容重" in df.columns:
df["土壤容重分级"] = df["土壤容重"].apply(lambda x: classify_bulk_density(x) if pd.notna(x) else np.nan)
if "全氮" in df.columns:
df["全氮分级"] = df["全氮"].apply(lambda x: classify_total_n(x) if pd.notna(x) else np.nan)
if "全磷" in df.columns:
df["全磷分级"] = df["全磷"].apply(lambda x: classify_total_p(x) if pd.notna(x) else np.nan)
if "全钾" in df.columns:
df["全钾分级"] = df["全钾"].apply(lambda x: classify_total_k(x) if pd.notna(x) else np.nan)
if "有效铁" in df.columns:
df["有效铁分级"] = df["有效铁"].apply(lambda x: classify_available_fe(x) if pd.notna(x) else np.nan)
if "有效锌" in df.columns:
df["有效锌分级"] = df["有效锌"].apply(lambda x: classify_available_zn(x) if pd.notna(x) else np.nan)
if "有效锰" in df.columns:
df["有效锰分级"] = df["有效锰"].apply(lambda x: classify_available_mn(x) if pd.notna(x) else np.nan)
if "有效铜" in df.columns:
df["有效铜分级"] = df["有效铜"].apply(lambda x: classify_available_cu(x) if pd.notna(x) else np.nan)
if "有效硼" in df.columns:
df["有效硼分级"] = df["有效硼"].apply(lambda x: classify_available_b(x) if pd.notna(x) else np.nan)
if "有效钼" in df.columns:
df["有效钼分级"] = df["有效钼"].apply(lambda x: classify_available_mo(x) if pd.notna(x) else np.nan)
if "有效硫" in df.columns:
df["有效硫分级"] = df["有效硫"].apply(lambda x: classify_available_s(x) if pd.notna(x) else np.nan)
if "交换性钙" in df.columns:
df["交换性钙分级"] = df["交换性钙"].apply(lambda x: classify_exchangeable_ca(x) if pd.notna(x) else np.nan)
if "交换性镁" in df.columns:
df["交换性镁分级"] = df["交换性镁"].apply(lambda x: classify_exchangeable_mg(x) if pd.notna(x) else np.nan)
if "全硒" in df.columns:
df["全硒分级"] = df["全硒"].apply(lambda x: classify_total_se(x) if pd.notna(x) else np.nan)
if "粉粒" in df.columns:
df["粉粒分级"] = df["粉粒"].apply(lambda x: classify_silt(x) if pd.notna(x) else np.nan)
if "黏粒" in df.columns:
df["黏粒分级"] = df["黏粒"].apply(lambda x: classify_clay(x) if pd.notna(x) else np.nan)
if "砂粒" in df.columns:
df["砂粒分级"] = df["砂粒"].apply(lambda x: classify_sand(x) if pd.notna(x) else np.nan)
if "有效土层厚度" in df.columns:
df["有效土层厚度分级"] = df["有效土层厚度"].apply(lambda x: classify_yxtchd(x) if pd.notna(x) else np.nan)
if "土壤质地" in df.columns:
df["土壤质地分级"] = df["土壤质地"].apply(lambda x: classify_trzd(x) if pd.notna(x) else np.nan)
return df
def main():
"""
主函数:程序入口
用户需根据实际情况修改以下参数
"""
logger = init_logger()
logger.info("="*50)
logger.info("土壤属性栅格面积加权统计程序启动")
logger.info("="*50)
# --------------------------
# 用户配置区域(必须修改!)
# --------------------------
# 1. 矢量图斑文件路径支持Shapefile、GeoPackage等格式
# TODO
VECTOR_PATH = r"D:\工作\三普成果编制\出图数据\北海\三普栅格\DL_ALL.shp" # 示例:"D:/data/土壤图斑.shp"
# 2. 土壤属性栅格文件配置(键:属性名称,值:栅格文件路径)
# 注意:属性名称必须与最终表格列名一致
# TODO
RASTER_FILES = {
"耕层厚度": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\GZCHD.tif", # 示例:"D:/data/耕层厚度.tif"
"土壤容重": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\TRRZ.tif", # 示例:"D:/data/土壤容重.tif"
"砂粒": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\SL.tif", # 示例:"D:/data/砂粒含量.tif"
"粉粒": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\FL.tif", # 示例:"D:/data/粉粒含量.tif"
"黏粒": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\NL.tif", # 示例:"D:/data/黏粒含量.tif"
"酸碱度": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\PH.tif", # 示例:"D:/data/pH值.tif"
"阳离子": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\CEC.tif", # 示例:"D:/data/阳离子交换量.tif"
"有机质": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\OM.tif", # 示例:"D:/data/有机质含量.tif"
"全氮": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\TN.tif", # 示例:"D:/data/全氮含量.tif"
"全磷": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\TP.tif", # 示例:"D:/data/全磷含量.tif"
"全钾": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\TK.tif", # 示例:"D:/data/全钾含量.tif"
"有效磷": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\AP.tif", # 示例:"D:/data/有效磷含量.tif"
"速效钾": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\AK.tif", # 示例:"D:/data/速效钾含量.tif"
# "有效铁": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\AFE.tif", # 示例:"D:/data/有效铁含量.tif"
"有效锌": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\AZN.tif", # 示例:"D:/data/有效锌含量.tif"
"有效锰": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\AMN.tif", # 示例:"D:/data/有效锰含量.tif"
"有效铜": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\ACU.tif", # 示例:"D:/data/有效铜含量.tif"
"有效硼": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\AB.tif", # 示例:"D:/data/有效硼含量.tif"
"有效钼": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\AMO.tif", # 示例:"D:/data/有效钼含量.tif"
"有效硫": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\AS1.tif", # 示例:"D:/data/有效硫含量.tif"
"交换性钙": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\ECA.tif", # 示例:"D:/data/交换性钙含量.tif"
"交换性镁": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\EMG.tif", # 示例:"D:/data/交换性镁含量.tif"
"全硒": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\TSE.tif", # 示例:"D:/data/全硒含量.tif"
"有效土层厚度": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\YXTCHD.tif", # 示例:"D:/data/有效土层厚度.tif"
"土壤质地": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\TRZD.tif", # 示例:"D:/data/土壤质地.tif"
}
# 3. 结果输出路径Excel文件
OUTPUT_PATH = "土壤属性图斑数据_面积加权结果.xlsx" # 示例:"D:/result/结果.xlsx"
# --------------------------
# 程序执行流程(无需修改)
# --------------------------
try:
# 1. 数据验证
if not validate_data(VECTOR_PATH, RASTER_FILES):
logger.error("数据验证失败,程序终止")
return
# 2. 执行面积加权统计
result_df = calculate_area_weighted_stats(VECTOR_PATH, RASTER_FILES, OUTPUT_PATH)
# 3. 显示结果预览
logger.info("\\n结果预览前3行")
print(result_df.head(3).to_string(index=False))
logger.info("\\n" + "="*50)
logger.info("程序执行完成!")
logger.info(f"结果文件:{OUTPUT_PATH}")
logger.info("="*50)
except Exception as e:
logger.error(f"程序执行出错:{str(e)}", exc_info=True)
logger.error("程序异常终止")
if __name__ == "__main__":
# 启动主程序
main()