refactor: 重构项目结构，将geo_tools重命名为app并更新相关引用

- 将主包名从geo_tools改为app - 更新所有模块中的引用路径 - 迁移并更新测试用例 - 添加项目规则文档 - 保持原有功能不变，仅进行结构调整
2026-04-12 19:49:56 +08:00
parent fcb8e1f255
commit db51d41aef
41 changed files with 4132 additions and 808 deletions
--- a/scripts/example_workflow.py
+++ b/scripts/example_workflow.py
@@ -17,8 +17,8 @@ from pathlib import Path
 import sys
 sys.path.insert(0, str(Path(__file__).parent.parent))

-import geo_tools
-from geo_tools.utils.logger import get_logger
+import app
+from app.utils.logger import get_logger

 logger = get_logger("example_workflow")

@@ -29,37 +29,37 @@ OUTPUT_DIR.mkdir(exist_ok=True)

 def main() -> None:
    logger.info("=" * 60)
-    logger.info("geo_tools 端到端工作流示例  v%s", geo_tools.__version__)
+    logger.info("geo_tools 端到端工作流示例  v%s", app.__version__)
    logger.info("=" * 60)

    # ── 1. 读取示例点数据 ──────────────────────────────────────────
    logger.info("\n[步骤 1] 读取示例点数据（GeoJSON）")
-    points = geo_tools.read_vector(DATA_DIR / "sample_points.geojson")
+    points = app.read_vector(DATA_DIR / "sample_points.geojson")
    logger.info("  读取完成：%d 条要素，CRS=%s", len(points), points.crs)
    logger.info("  字段：%s", list(points.columns))

    # ── 2. 读取示例面数据 ──────────────────────────────────────────
    logger.info("\n[步骤 2] 读取示例区域多边形（GeoJSON）")
-    regions = geo_tools.read_vector(DATA_DIR / "sample_regions.geojson")
+    regions = app.read_vector(DATA_DIR / "sample_regions.geojson")
    logger.info("  区域列表：%s", regions["name"].tolist())

    # ── 3. 数据校验 ───────────────────────────────────────────────
    logger.info("\n[步骤 3] 几何有效性校验")
-    stats = geo_tools.validate_geometry(points)
+    stats = app.validate_geometry(points)
    logger.info("  点数据校验结果：%s", stats)
-    stats = geo_tools.validate_geometry(regions)
+    stats = app.validate_geometry(regions)
    logger.info("  面数据校验结果：%s", stats)

    # ── 4. 坐标系信息 ─────────────────────────────────────────────
    logger.info("\n[步骤 4] 查询 CRS 信息")
-    crs_info = geo_tools.get_crs_info("EPSG:4326")
+    crs_info = app.get_crs_info("EPSG:4326")
    logger.info("  WGS84 信息：%s", crs_info)
-    proj_crs = geo_tools.suggest_projected_crs(116.4, 39.9)
+    proj_crs = app.suggest_projected_crs(116.4, 39.9)
    logger.info("  北京适合的投影 CRS：%s", proj_crs)

    # ── 5. 重投影 ─────────────────────────────────────────────────
    logger.info("\n[步骤 5] 重投影到 Web Mercator（用于可视化）")
-    points_3857 = geo_tools.reproject(points, "EPSG:3857")
+    points_3857 = app.reproject(points, "EPSG:3857")
    logger.info("  重投影完成：CRS=%s", points_3857.crs)

    # ── 6. 面积加权均值 ───────────────────────────────────────────
@@ -68,31 +68,31 @@ def main() -> None:
    points_buffered = points.to_crs("EPSG:3857").copy()
    points_buffered["geometry"] = points_buffered.geometry.buffer(100_000)  # 100km缓冲
    points_buffered = points_buffered.to_crs("EPSG:4326")
-    from geo_tools.analysis.stats import area_weighted_mean
+    from app.analysis.stats import area_weighted_mean
    aw_result = area_weighted_mean(points_buffered, value_col="value")
    logger.info("  全局面积加权均值：%.4f", aw_result["area_weighted_mean"])

    # ── 7. 按位置选择 ─────────────────────────────────────────────
    logger.info("\n[步骤 7] 按位置选择：筛选华南区域内的城市")
    hua_nan = regions[regions["name"] == "华南"]
-    points_in_huanan = geo_tools.select_by_location(points, hua_nan, predicate="intersects")
+    points_in_huanan = app.select_by_location(points, hua_nan, predicate="intersects")
    logger.info("  华南区域内的城市：%s", points_in_huanan["name"].tolist())

    # ── 8. 统计汇总 ───────────────────────────────────────────────
    logger.info("\n[步骤 8] 属性统计汇总")
-    from geo_tools.analysis.stats import summarize_attributes
+    from app.analysis.stats import summarize_attributes
    summary = summarize_attributes(points, columns=["value"], group_col="category")
    logger.info("  按分类汇总：\n%s", summary.to_string(index=False))

    # ── 9. 写出结果 ───────────────────────────────────────────────
    logger.info("\n[步骤 9] 写出处理结果")
    out_geojson = OUTPUT_DIR / "result_points_3857.geojson"
-    geo_tools.write_vector(points_3857, out_geojson)
+    app.write_vector(points_3857, out_geojson)
    logger.info("  GeoJSON 写出：%s", out_geojson)

    out_gpkg = OUTPUT_DIR / "results.gpkg"
-    geo_tools.write_gpkg(points, out_gpkg, layer="original_points")
-    geo_tools.write_gpkg(regions, out_gpkg, layer="regions", mode="a")
+    app.write_gpkg(points, out_gpkg, layer="original_points")
+    app.write_gpkg(regions, out_gpkg, layer="regions", mode="a")
    logger.info("  GPKG 写出（2 图层）：%s", out_gpkg)

    logger.info("\n" + "=" * 60)
--- a/scripts/其他工具/A耕作层厚度栅格制作_新1.py
+++ b/scripts/其他工具/A耕作层厚度栅格制作_新1.py
@@ -0,0 +1,188 @@
+import os
+from pathlib import Path
+import time
+
+import geopandas as gpd
+from geopandas.io import file
+import pandas as pd
+import numpy as np
+
+def assign_gzchd_flexible_v2(soil_prop, point_path, polygon_path, output_path):
+    print("正在读取数据...")
+    points = gpd.read_file(point_path)
+    polygons = gpd.read_file(polygon_path)
+
+    # 1. 坐标系转换
+    if points.crs != polygons.crs:
+        print(f"坐标系不一致，正在转换点数据...")
+        points = points.to_crs(polygons.crs)
+
+    # 2. 预处理, 判断样点是否存在TZ字段，如果不存在，则用TDLYLX字段代替,并将其转为字符串类型,如果两个字段都不存在，则报错
+    if 'TZ' not in points.columns:
+        if 'TDLYLX' in points.columns:
+            points['TZ'] = points['TDLYLX'].astype(str).str.strip()
+        else:
+            raise ValueError("点要素类中不存在TZ或TDLYLX字段，无法进行匹配！")
+    else:
+        points['TZ'] = points['TZ'].astype(str).str.strip()
+
+    polygons['TZ'] = polygons['TZ'].astype(str).str.strip()
+    
+    # 确保 GZCHD 是数值类型，避免合并时类型冲突
+    points[soil_prop] = pd.to_numeric(points[soil_prop], errors='coerce').fillna(0)
+    
+    if soil_prop in polygons.columns:
+        polygons = polygons.drop(columns=[soil_prop])
+
+    # 辅助函数：按指定字段分组进行最近点匹配
+    def match_by_attribute(poly_gdf, pt_gdf, attr_name, suffix):
+        if attr_name not in poly_gdf.columns or attr_name not in pt_gdf.columns:
+            return None, []
+
+        poly_sub = poly_gdf[poly_gdf[attr_name].notna()].copy()
+        point_sub = pt_gdf[pt_gdf[attr_name].notna()].copy()
+
+        if poly_sub.empty or point_sub.empty:
+            return None, []
+
+        poly_sub[attr_name] = poly_sub[attr_name].astype(str).str.strip()
+        point_sub[attr_name] = point_sub[attr_name].astype(str).str.strip()
+
+        common_values = set(poly_sub[attr_name].unique()) & set(point_sub[attr_name].unique())
+        if not common_values:
+            return None, []
+
+        matched_parts = []
+        matched_ids = []
+        for value in common_values:
+            poly_part = poly_sub[poly_sub[attr_name] == value].copy()
+            point_part = point_sub[point_sub[attr_name] == value][[soil_prop, 'geometry']].copy()
+            if poly_part.empty or point_part.empty:
+                continue
+
+            matched_part = gpd.sjoin_nearest(poly_part, point_part, how='left', rsuffix=suffix)
+            matched_part = matched_part[~matched_part.index.duplicated(keep='first')]
+            if not matched_part.empty:
+                matched_parts.append(matched_part)
+                matched_ids.extend(matched_part.index.tolist())
+
+        if matched_parts:
+            return pd.concat(matched_parts), matched_ids
+        return None, []
+
+    matched_results = []
+    matched_indices = []
+
+    # --- 第一步：按相同 TZ 匹配 ---
+    print("步骤 1: 正在匹配相同 TZ 的最近点...")
+    first_matched, first_ids = match_by_attribute(polygons, points, 'TZ', '_p1')
+    if first_matched is not None and not first_matched.empty:
+        matched_results.append(first_matched)
+        matched_indices.extend(first_ids)
+
+    # --- 第二步：按 TS 匹配未匹配面 ---
+    unmatched_mask = ~polygons.index.isin(matched_indices)
+    remaining_polygons = polygons[unmatched_mask].copy()
+    print(f"步骤 2: 正在为 {len(remaining_polygons)} 个要素匹配 TS 最近点...")
+
+    if not remaining_polygons.empty:
+        ts_matched, ts_ids = match_by_attribute(remaining_polygons, points, 'TS', '_p_ts')
+        if ts_matched is not None and not ts_matched.empty:
+            matched_results.append(ts_matched)
+            matched_indices.extend(ts_ids)
+            remaining_polygons = polygons[~polygons.index.isin(matched_indices)].copy()
+            print(f"已匹配 TS: {len(ts_ids)} 个要素，剩余 {len(remaining_polygons)} 个。")
+        else:
+            print("未匹配到 TS 类型要素，继续下一步。")
+
+        # --- 第三步：按 YL 匹配未匹配面 ---
+        if not remaining_polygons.empty:
+            print(f"步骤 3: 正在为 {len(remaining_polygons)} 个要素匹配 YL 最近点...")
+            yl_matched, yl_ids = match_by_attribute(remaining_polygons, points, 'YL', '_p_yl')
+            if yl_matched is not None and not yl_matched.empty:
+                matched_results.append(yl_matched)
+                matched_indices.extend(yl_ids)
+                remaining_polygons = polygons[~polygons.index.isin(matched_indices)].copy()
+                print(f"已匹配 YL: {len(yl_ids)} 个要素，剩余 {len(remaining_polygons)} 个。")
+            else:
+                print("未匹配到 YL 类型要素，继续下一步。")
+
+        # --- 第四步：按 TL 匹配未匹配面 ---
+        if not remaining_polygons.empty:
+            print(f"步骤 4: 正在为 {len(remaining_polygons)} 个要素匹配 TL 最近点...")
+            tl_matched, tl_ids = match_by_attribute(remaining_polygons, points, 'TL', '_p_tl')
+            if tl_matched is not None and not tl_matched.empty:
+                matched_results.append(tl_matched)
+                matched_indices.extend(tl_ids)
+                remaining_polygons = polygons[~polygons.index.isin(matched_indices)].copy()
+                print(f"已匹配 TL: {len(tl_ids)} 个要素，剩余 {len(remaining_polygons)} 个。")
+            else:
+                print("未匹配到 TL 类型要素，继续全局最近点。")
+
+    else:
+        print("没有未匹配的面要素，跳过 TS/YL/TL 匹配。")
+
+    # --- 最后：全局最近点匹配剩余面 ---
+    unmatched_mask = ~polygons.index.isin(matched_indices)
+    remaining_polygons = polygons[unmatched_mask].copy()
+    print(f"最后一步: 正在为 {len(remaining_polygons)} 个要素匹配全局最近点...")
+
+    if not remaining_polygons.empty:
+        point_pool = points[[soil_prop, 'geometry']]
+        step_final = gpd.sjoin_nearest(remaining_polygons, point_pool, how='left', rsuffix='_p2')
+        step_final = step_final[~step_final.index.duplicated(keep='first')]
+        matched_results.append(step_final)
+
+    # --- 第三步：稳健合并 ---
+    print("正在合并数据...")
+    # 过滤掉列表中可能的 None 或空 DataFrame，防止 FutureWarning
+    to_concat = [res for res in matched_results if res is not None and not res.empty]
+    
+    if to_concat:
+        final_gdf = pd.concat(to_concat)
+    else:
+        # 如果没有任何匹配结果，返回带空 GZCHD 的原面要素
+        final_gdf = polygons.copy()
+        final_gdf[soil_prop] = 0
+
+    # --- 4. 清理与保存 ---
+    # 删除临时列
+    cols_to_drop = [
+        c for c in final_gdf.columns
+        if 'index_' in c
+        or '_p1' in c
+        or '_p2' in c
+        or '_p_ts' in c
+        or '_p_yl' in c
+        or '_p_tl' in c
+    ]
+    final_gdf = final_gdf.drop(columns=cols_to_drop)
+    
+    # 强制去重复列名
+    final_gdf = final_gdf.loc[:, ~final_gdf.columns.duplicated()]
+    
+    # 填充空值并确保类型一致
+    if soil_prop in final_gdf.columns:
+        final_gdf[soil_prop] = final_gdf[soil_prop].fillna(0)
+    else:
+        final_gdf[soil_prop] = 0
+
+    print(f"正在保存结果至: {output_path}")
+    final_gdf.to_file(output_path, encoding='utf-8')
+    print("处理完成！")
+    
+    return final_gdf
+
+if __name__ == "__main__":
+    # 遍历文件夹中所有样点shp文件，并进行处理
+    shp_file = r"E:\@三普属性图出图\测试\YXTCHD.shp"  # 样点数据文件夹
+    dltb_file = r"E:\@三普属性图出图\广西天峨县\@基础数据\土壤类型图\土壤类型图.shp"  # 耕地图斑
+    output_folder = r"E:\@三普属性图出图\广西天峨县"  # 输出文件夹
+
+    assign_gzchd_flexible_v2(
+        soil_prop= "YXTCHD",  # 耕地层厚度字段名
+        point_path=shp_file,  # 样点数据
+        polygon_path= dltb_file,
+        output_path=fr'{output_folder}\YXTCHD.shp'  # 输出文件
+    )
+    time.sleep(2)  # 防止文件读写冲突
--- a/scripts/其他工具/样点剔除统计表格.py
+++ b/scripts/其他工具/样点剔除统计表格.py
@@ -0,0 +1,404 @@
+import pandas as pd
+import numpy as np
+import os
+import geopandas as gpd
+from openpyxl import Workbook
+from openpyxl.styles import Alignment, Font, Border, Side
+from openpyxl.utils import get_column_letter
+
+# 定义指标代码与单位的对应关系
+INDICATOR_UNITS = {
+    # 基本指标
+    'PH': ('pH', '-'),
+    'ECA': ('交换性钙', 'cmol(½Ca²⁺)/kg'),
+    'EMG': ('交换性镁', 'cmol(½Mg²⁺)/kg'),
+    'TN': ('全氮', 'g/kg'),
+    'TP': ('全磷', 'g/kg'),
+    'TK': ('全钾', 'g/kg'),
+    'AS1': ('有效硫', 'mg/kg'),
+    'AB': ('有效硼', 'mg/kg'),
+    'AP': ('有效磷', 'mg/kg'),
+    'AFE': ('有效铁', 'mg/kg'),
+    'ACU': ('有效铜', 'mg/kg'),
+    'AZN': ('有效锌', 'mg/kg'),
+    'AMN': ('有效锰', 'mg/kg'),
+    'OM': ('有机质', 'g/kg'),
+    'GZCHD': ('耕层厚度', 'cm'),
+    'AK': ('速效钾', 'mg/kg'),
+    'CEC': ('阳离子交换量', 'cmol/kg'),
+    # 特殊指标 - 根据文件名对应字段
+    'FL': ('粉粒', '%'),
+    'NL': ('黏粒', '%'),
+    'SL': ('砂粒', '%'),
+    'TRRZPJZ': ('土壤容重', 'g/cm³'),
+    'TRZD': ('土壤质地', '分类'),
+    # 其他可能指标
+    'AMO': ('有效钼', 'mg/kg'),
+    'TSE': ('全硒', 'mg/kg'),
+    'YXTCHD': ('有效土层厚度', 'cm')
+}
+
+# 文件名到字段的映射
+FILENAME_TO_FIELD = {
+    '粉粒': 'FL',
+    '黏粒': 'NL',
+    '砂粒': 'SL',
+    '表层容重': 'TRRZPJZ',
+    '土壤质地十二级分类': 'TRZD',
+    '双江县YXTCHD': 'YXTCHD'
+}
+
+# 扩展字段别名映射，支持更多pH字段名
+FIELD_ALIASES = {
+    'PH': ['pH', 'PH', 'ph'],  # 支持pH的各种大小写形式
+    'ECA': ['交换性钙', 'ECA'],
+    'EMG': ['交换性镁', 'EMG'],
+    'TN': ['全氮', 'TN'],
+    'TP': ['全磷', 'TP'],
+    'TK': ['全钾', 'TK'],
+    'AS1': ['有效硫', 'AS1'],
+    'AB': ['有效硼', 'AB'],
+    'AP': ['有效磷', 'AP'],
+    'AFE': ['有效铁', 'AFE'],
+    'ACU': ['有效铜', 'ACU'],
+    'AZN': ['有效锌', 'AZN'],
+    'AMN': ['有效锰', 'AMN'],
+    'OM': ['有机质', 'OM'],
+    'GZCHD': ['耕层厚度', 'GZCHD'],
+    'AK': ['速效钾', 'AK'],
+    'CEC': ['阳离子交换量', 'CEC'],
+    'FL': ['粉粒', 'FL'],
+    'NL': ['黏粒', 'NL'],
+    'SL': ['砂粒', 'SL'],
+    'TRRZPJZ': ['土壤容重', 'TRRZPJZ'],
+    'TRZD': ['土壤质地', 'TRZD'],
+    'AMO': ['有效钼', 'AMO'],
+    'TSE': ['全硒', 'TSE'],
+    'YXTCHD': ['有效土层厚度', 'YXTCHD']
+}
+
+
+def find_shapefiles(folder_path):
+    """在文件夹中递归查找所有的Shapefile文件"""
+    shapefiles = []
+
+    for root, dirs, files in os.walk(folder_path):
+        for file in files:
+            if file.lower().endswith('.shp'):
+                shapefiles.append(os.path.join(root, file))
+
+    return shapefiles
+
+
+def read_shapefile_data(shapefile_path):
+    """读取Shapefile数据并返回属性表"""
+    try:
+        print(f"  读取Shapefile: {os.path.basename(shapefile_path)}")
+        gdf = gpd.read_file(shapefile_path, encoding='utf-8')
+
+        print(f"    要素数量: {len(gdf)}")
+        print(f"    属性字段: {list(gdf.columns)}")
+
+        return gdf
+    except Exception as e:
+        print(f"  读取Shapefile失败: {e}")
+        try:
+            gdf = gpd.read_file(shapefile_path, encoding='gbk')
+            print(f"    使用GBK编码成功读取")
+            return gdf
+        except:
+            return None
+
+
+def get_indicator_data(gdf, filename):
+    """从GeoDataFrame中获取指标数据，使用统一字段匹配逻辑"""
+    indicator_data = {}
+
+    basename = os.path.basename(filename).replace('.shp', '')
+
+    # 1. 首先尝试文件名映射
+    target_field = None
+    if basename in FILENAME_TO_FIELD:
+        target_field = FILENAME_TO_FIELD[basename]
+        if target_field in gdf.columns:
+            indicator_data[target_field] = gdf[target_field]
+            print(f"    通过文件名映射找到字段: {target_field}")
+        else:
+            # 尝试通过别名查找
+            for indicator_code in INDICATOR_UNITS.keys():
+                if target_field == indicator_code:
+                    for alias in FIELD_ALIASES.get(indicator_code, []):
+                        if alias in gdf.columns:
+                            indicator_data[indicator_code] = gdf[alias]
+                            print(f"    通过文件名映射+别名找到字段: {alias} -> {indicator_code}")
+                            break
+
+    # 2. 如果没有通过文件名找到，尝试直接匹配所有指标和别名
+    if not indicator_data:
+        for indicator_code in INDICATOR_UNITS.keys():
+            # 先尝试直接匹配指标代码
+            if indicator_code in gdf.columns:
+                indicator_data[indicator_code] = gdf[indicator_code]
+                print(f"    直接匹配字段: {indicator_code}")
+                continue
+
+            # 再尝试匹配别名
+            aliases = FIELD_ALIASES.get(indicator_code, [])
+            for alias in aliases:
+                if alias in gdf.columns:
+                    indicator_data[indicator_code] = gdf[alias]
+                    print(f"    通过别名匹配: {alias} -> {indicator_code}")
+                    break
+
+    # 3. 额外检查：如果文件名包含特定关键词，尝试匹配
+    if not indicator_data:
+        filename_lower = basename.lower()
+        for indicator_code, (chinese_name, unit) in INDICATOR_UNITS.items():
+            if indicator_code.lower() in filename_lower or chinese_name in filename_lower:
+                # 尝试匹配指标代码或中文名
+                if indicator_code in gdf.columns:
+                    indicator_data[indicator_code] = gdf[indicator_code]
+                    print(f"    通过文件名关键词匹配: {indicator_code}")
+                    break
+                elif chinese_name in gdf.columns:
+                    indicator_data[indicator_code] = gdf[chinese_name]
+                    print(f"    通过文件名关键词匹配中文名: {chinese_name} -> {indicator_code}")
+                    break
+
+    return indicator_data
+
+
+def get_combined_stats_from_folder(folder_path, folder_name="数据"):
+    """从文件夹中所有shapefile合并统计指定指标"""
+    shapefiles = find_shapefiles(folder_path)
+
+    if not shapefiles:
+        print(f"  未找到Shapefile文件")
+        return pd.DataFrame()
+
+    print(f"  找到 {len(shapefiles)} 个Shapefile文件")
+
+    all_data = {code: [] for code in INDICATOR_UNITS.keys()}
+
+    for i, shp_file in enumerate(shapefiles, 1):
+        print(f"\n  [{i}] 处理文件: {os.path.basename(shp_file)}")
+        gdf = read_shapefile_data(shp_file)
+
+        if gdf is not None:
+            indicator_data = get_indicator_data(gdf, shp_file)
+
+            for indicator_code, data_series in indicator_data.items():
+                if indicator_code in all_data:
+                    # 转换为数值类型，处理可能的非数值数据
+                    try:
+                        data_series = pd.to_numeric(data_series, errors='coerce')
+                        valid_data = data_series.dropna()
+                        if len(valid_data) > 0:
+                            all_data[indicator_code].extend(valid_data.tolist())
+                            print(f"    提取 {indicator_code}: {len(valid_data)} 个值")
+                    except Exception as e:
+                        print(f"    处理 {indicator_code} 数据时出错: {e}")
+
+    # 计算每个指标的合并统计
+    stats_list = []
+
+    for indicator_code, (chinese_name, unit) in INDICATOR_UNITS.items():
+        data_list = all_data.get(indicator_code, [])
+        if not data_list:
+            continue
+
+        data_series = pd.Series(data_list)
+        # 过滤极端值（可选，根据实际需求调整）
+        data_series = data_series[(data_series >= 0) | pd.isna(data_series)]
+
+        if len(data_series) == 0:
+            continue
+
+        # 关键修复1：计算总体标准差（ddof=0），而不是默认的样本标准差（ddof=1）
+        std_dev = data_series.std(ddof=0)
+        mean_val = data_series.mean()
+
+        # 关键修复2：优化变异系数计算
+        if abs(mean_val) < 1e-8:  # 均值接近0时
+            cv_value = 0.0
+        else:
+            # CV = (标准差 / 均值) * 100，保留2位小数
+            cv_value = round((std_dev / mean_val) * 100, 2)
+
+        stats = {
+            '指标代码': indicator_code,
+            '指标': chinese_name,
+            '单位': unit,
+            '样点数': int(len(data_series)),
+            'Min': round(float(data_series.min()), 2),
+            'Max': round(float(data_series.max()), 2),
+            'Mean': round(float(mean_val), 2),
+            'Std': round(float(std_dev), 2),  # 使用总体标准差
+            'CV': cv_value
+        }
+        stats_list.append(stats)
+        print(f"    统计 {chinese_name}({indicator_code}): {len(data_series)} 个样点")
+
+    if stats_list:
+        stats_df = pd.DataFrame(stats_list)
+        stats_df = stats_df.sort_values('指标')
+        print(f"\n  总共统计到 {len(stats_df)} 个指标")
+        return stats_df
+
+    print("  未找到任何指标数据")
+    return pd.DataFrame()
+
+
+def create_statistics_excel(before_folder, after_folder, output_path):
+    """创建融合的统计表格，在剔除后表格前加一列剔除前样点数和剔除样点数"""
+    workbook = Workbook()
+
+    # 移除默认sheet
+    if 'Sheet' in workbook.sheetnames:
+        default_sheet = workbook['Sheet']
+        workbook.remove(default_sheet)
+
+    # 定义样式
+    thin_border = Border(
+        left=Side(style='thin'),
+        right=Side(style='thin'),
+        top=Side(style='thin'),
+        bottom=Side(style='thin')
+    )
+
+    print("=" * 60)
+    print("开始分析样点数据")
+    print("=" * 60)
+
+    # 分析剔除前数据
+    before_stats = None
+    if os.path.exists(before_folder):
+        print(f"\n[1] 分析剔除前数据:")
+        print(f"文件夹路径: {before_folder}")
+        before_stats = get_combined_stats_from_folder(before_folder, "剔除前")
+        if not before_stats.empty:
+            print(f"✓ 剔除前统计完成: {len(before_stats)} 个指标")
+        else:
+            print("✗ 剔除前未找到指定指标数据")
+            before_stats = None
+    else:
+        print(f"✗ 剔除前文件夹不存在: {before_folder}")
+        before_stats = None
+
+    # 分析剔除后数据
+    if os.path.exists(after_folder):
+        print(f"\n[2] 分析剔除后数据:")
+        print(f"文件夹路径: {after_folder}")
+        after_stats = get_combined_stats_from_folder(after_folder, "剔除后")
+
+        if not after_stats.empty:
+            # 创建融合的统计工作表
+            sheet_combined = workbook.create_sheet(title="样点统计")
+
+            # 新的表头：指标, 单位, 剔除前样点数, 剔除样点数, 剔除后样点数, Min, Max, Mean, Std, CV
+            combined_headers = ['指标', '单位', '剔除前样点数', '剔除样点数', '剔除后样点数', 'Min', 'Max', 'Mean',
+                                'Std', 'CV']
+            for col, header in enumerate(combined_headers, 1):
+                cell = sheet_combined.cell(row=1, column=col, value=header)
+                cell.alignment = Alignment(horizontal='center', vertical='center')
+                cell.font = Font(bold=True)
+                cell.border = thin_border
+
+            # 写入数据
+            for row_idx, (index, after_row) in enumerate(after_stats.iterrows(), start=2):
+                # 查找对应的剔除前数据
+                before_sample_count = 0
+                before_row = None
+                if before_stats is not None:
+                    # 首先尝试通过指标代码匹配
+                    matching_rows = before_stats[before_stats['指标代码'] == after_row['指标代码']]
+                    if not matching_rows.empty:
+                        before_row = matching_rows.iloc[0]
+                    else:
+                        # 如果指标代码匹配失败，尝试通过指标名称匹配
+                        matching_rows = before_stats[before_stats['指标'] == after_row['指标']]
+                        if not matching_rows.empty:
+                            before_row = matching_rows.iloc[0]
+
+                if before_row is not None:
+                    before_sample_count = int(before_row['样点数'])
+
+                # 计算剔除样点数
+                after_sample_count = int(after_row['样点数'])
+                abnormal_count = max(0, before_sample_count - after_sample_count)
+
+                # 写入数据
+                sheet_combined.cell(row=row_idx, column=1, value=after_row['指标'])  # 指标
+                sheet_combined.cell(row=row_idx, column=2, value=after_row['单位'])  # 单位
+                sheet_combined.cell(row=row_idx, column=3, value=before_sample_count)  # 剔除前样点数
+                sheet_combined.cell(row=row_idx, column=4, value=abnormal_count)  # 剔除样点数
+                sheet_combined.cell(row=row_idx, column=5, value=after_sample_count)  # 剔除后样点数
+                sheet_combined.cell(row=row_idx, column=6, value=after_row['Min'])  # Min
+                sheet_combined.cell(row=row_idx, column=7, value=after_row['Max'])  # Max
+                sheet_combined.cell(row=row_idx, column=8, value=after_row['Mean'])  # Mean
+                sheet_combined.cell(row=row_idx, column=9, value=after_row['Std'])  # Std
+                sheet_combined.cell(row=row_idx, column=10, value=after_row['CV'])  # CV
+
+                # 设置所有单元格的样式
+                for col_idx in range(1, 11):
+                    cell = sheet_combined.cell(row=row_idx, column=col_idx)
+                    cell.alignment = Alignment(horizontal='center', vertical='center')
+                    cell.border = thin_border
+
+                # 如果剔除了样点，高亮显示剔除样点数列
+                if abnormal_count > 0:
+                    cell = sheet_combined.cell(row=row_idx, column=4)  # 剔除样点数列
+                    cell.font = Font(bold=True, color="FF0000")  # 红色加粗
+
+            # 调整列宽
+            combined_column_widths = {
+                '指标': 15,
+                '单位': 12,
+                '剔除前样点数': 12,
+                '剔除样点数': 12,
+                '剔除后样点数': 12,
+                'Min': 10,
+                'Max': 10,
+                'Mean': 10,
+                'Std': 10,
+                'CV': 10
+            }
+
+            for col_idx, col_name in enumerate(combined_headers, 1):
+                column_letter = get_column_letter(col_idx)
+                if col_name in combined_column_widths:
+                    sheet_combined.column_dimensions[column_letter].width = combined_column_widths[col_name]
+
+            print(f"\n✓ 融合统计完成: {len(after_stats)} 个指标")
+
+            # 输出匹配信息
+            if before_stats is not None:
+                print(f"  剔除前找到 {len(before_stats)} 个指标")
+                print(f"  剔除后找到 {len(after_stats)} 个指标")
+                print(
+                    f"  成功匹配 {len([i for i in range(2, len(after_stats) + 2) if sheet_combined.cell(row=i, column=3).value > 0])} 个指标的剔除前数据")
+        else:
+            print("✗ 剔除后未找到指定指标数据")
+            sheet_combined = workbook.create_sheet(title="样点统计")
+            sheet_combined.cell(row=1, column=1, value="未找到指定指标数据")
+    else:
+        print(f"✗ 剔除后文件夹不存在: {after_folder}")
+        sheet_combined = workbook.create_sheet(title="样点统计")
+        sheet_combined.cell(row=1, column=1, value="剔除后文件夹不存在")
+
+    # 保存文件
+    workbook.save(output_path)
+    print(f"\n" + "=" * 60)
+    print(f"文件保存成功: {output_path}")
+    print("=" * 60)
+
+
+# ================ 使用示例 ================
+if __name__ == "__main__":
+    # 方式1: 处理单个样点数据对
+    before_folder = r"D:\a陆平\1.5实验数据\12月新版实验室数据\云南省实验室数据成果1218\永仁县20260127"
+    after_folder = r"D:\a陆平\1.5实验数据\12月新版实验室数据\云南省实验室数据成果1218\永仁县20260127剔除后"
+    output_path = r"D:\a陆平\1.5实验数据\12月新版实验室数据\云南省实验室数据成果1218\永仁县样点统计结果.xlsx"
+
+    # 执行
+    create_statistics_excel(before_folder, after_folder, output_path)
--- a/scripts/其他工具/面积加权均值_华南.py
+++ b/scripts/其他工具/面积加权均值_华南.py
--- a/scripts/其他工具/面积加权均值_西南.py
+++ b/scripts/其他工具/面积加权均值_西南.py
@@ -0,0 +1,932 @@
+# 生成完整的exactextract面积加权计算Python脚本
+# -*- coding: utf-8 -*-
+"""
+土壤属性栅格数据面积加权统计脚本
+基于exactextract库实现多属性栅格的面积加权平均值计算
+最终输出格式与土壤属性图斑数据表一致
+"""
+
+import exactextract as ee
+import geopandas as gpd
+import rasterio
+import pandas as pd
+import numpy as np
+from pathlib import Path
+import warnings
+warnings.filterwarnings('ignore')
+
+def init_logger():
+    """初始化日志输出，便于跟踪处理过程"""
+    import logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(levelname)s - %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S'
+    )
+    return logging.getLogger(__name__)
+
+def validate_data(vector_path, raster_files):
+    """
+    验证输入数据的有效性
+    :param vector_path: 矢量图斑文件路径
+    :param raster_files: 栅格文件字典
+    :return: 验证结果（布尔值）
+    """
+    logger = init_logger()
+
+    # 验证矢量文件是否存在
+    if not Path(vector_path).exists():
+        logger.error(f"矢量文件不存在：{vector_path}")
+        return False
+
+    # 验证栅格文件是否存在
+    for attr_name, raster_path in raster_files.items():
+        if not Path(raster_path).exists():
+            logger.error(f"栅格文件不存在：{attr_name} -> {raster_path}")
+            return False
+
+    # 验证矢量文件格式
+    try:
+        gdf = gpd.read_file(vector_path)
+        if gdf.geometry.type.unique()[0] != 'Polygon':
+            logger.error("矢量文件必须是Polygon类型（面要素）")
+            return False
+    except Exception as e:
+        logger.error(f"矢量文件读取失败：{str(e)}")
+        return False
+
+    # 验证栅格文件格式
+    try:
+        test_raster = next(iter(raster_files.values()))
+        with rasterio.open(test_raster) as src:
+            if src.count != 1:
+                logger.error("每个栅格文件必须是单波段（每个属性单独一个栅格）")
+                return False
+    except Exception as e:
+        logger.error(f"栅格文件读取失败：{str(e)}")
+        return False
+
+    logger.info("所有输入数据验证通过")
+    return True
+
+def standardize_crs(gdf, raster_path):
+    """
+    标准化矢量与栅格的坐标参考系统（CRS）
+    :param gdf: 矢量GeoDataFrame
+    :param raster_path: 任意一个栅格文件路径（用于获取目标CRS）
+    :return: 标准化后的GeoDataFrame
+    """
+    logger = init_logger()
+
+    # 获取栅格CRS
+    with rasterio.open(raster_path) as src:
+        raster_crs = src.crs
+        raster_crs_str = src.crs.to_string()
+
+    # 获取矢量CRS
+    vector_crs = gdf.crs
+    vector_crs_str = gdf.crs.to_string()
+
+    logger.info(f"当前矢量CRS：{vector_crs_str}")
+    logger.info(f"目标栅格CRS：{raster_crs_str}")
+
+    # 若CRS不一致，进行转换
+    if vector_crs != raster_crs:
+        logger.warning("矢量与栅格CRS不一致，正在进行转换...")
+        gdf = gdf.to_crs(raster_crs)
+        logger.info(f"CRS转换完成，新矢量CRS：{gdf.crs.to_string()}")
+
+    return gdf
+
+def calculate_area_weighted_stats(vector_path, raster_files, output_path):
+    """
+    核心函数：计算面积加权统计值
+    :param vector_path: 矢量图斑文件路径
+    :param raster_files: 栅格文件字典（键：属性名，值：栅格路径）
+    :param output_path: 结果输出路径（Excel文件）
+    :return: 统计结果DataFrame
+    """
+    logger = init_logger()
+    logger.info("开始面积加权统计计算")
+
+    # 1. 加载矢量数据
+    logger.info(f"加载矢量数据：{vector_path}")
+    gdf = gpd.read_file(vector_path)
+
+    # 2. 标准化CRS
+    test_raster = next(iter(raster_files.values()))
+    gdf = standardize_crs(gdf, test_raster)
+
+    # 3. 初始化结果DataFrame（保留矢量中的关键属性）
+    logger.info("初始化结果数据结构")
+    # 基础字段列表（与土壤属性图斑表格式对齐）
+    # TODO
+    base_fields = ["FID","DM","XZM","QSDWDM","QSDWMC","TL", "YL", "TS", "TZ", "DLBM", "DLMC"]
+
+    # 检查矢量中是否包含必要字段，若不包含则创建空字段
+    result_df = pd.DataFrame()
+    for field in base_fields:
+        if field in gdf.columns:
+            result_df[field] = gdf[field]
+        else:
+            result_df[field] = np.nan
+            logger.warning(f"矢量中缺少'{field}'字段，将生成空值")
+
+    # 4. 计算图斑面积（转换为亩）
+    logger.info("计算图斑面积（单位：亩）")
+    # 计算平方米面积（根据CRS单位自动适应）
+    gdf["area_sqm"] = gdf.geometry.area
+    # 转换为亩（1亩 ≈ 666.6667平方米）
+    result_df["面积亩"] = gdf["area_sqm"] * 0.0015
+    # 保留6位小数，与示例数据格式一致
+    result_df["面积亩"] = result_df["面积亩"].round(6)
+
+    # 5. 对每个土壤属性进行面积加权平均计算
+    logger.info("开始处理土壤属性栅格（面积加权平均）")
+    for attr_idx, (attr_name, raster_path) in enumerate(raster_files.items(), 1):
+        total_attrs = len(raster_files)
+        logger.info(f"处理进度：{attr_idx}/{total_attrs} - 属性：{attr_name}")
+
+        try:
+            # 使用exactextract计算面积加权平均
+            # weights="area"：按矢量与栅格的交集面积进行加权
+            stats = ee.exact_extract(
+                raster_path,
+                gdf,
+                ["mean"],  # 计算平均值
+                output="pandas"  # 输出为DataFrame格式
+            )
+
+            # 将统计结果添加到结果DataFrame
+            if stats is None:
+                logger.warning(f"{attr_name}计算结果为空，可能无交集区域")
+                result_df[attr_name] = np.nan
+                continue
+            else:
+                # 确保 stats 为 pandas.DataFrame，以便使用字符串索引
+                if not isinstance(stats, pd.DataFrame):
+                    stats = pd.DataFrame(stats)
+
+                # 保留4位小数，确保数据精度
+                result_df[attr_name] = stats["mean"].round(4)
+
+            # 处理可能的空值（无交集区域）
+            if result_df[attr_name].isnull().sum() > 0:
+                null_count = result_df[attr_name].isnull().sum()
+                logger.warning(f"{attr_name}存在{null_count}个空值（图斑与栅格无交集）")
+                # 用0填充空值（可根据业务需求调整）
+                result_df[attr_name] = result_df[attr_name].fillna(0)
+
+        except Exception as e:
+            logger.error(f"{attr_name}处理失败：{str(e)}")
+            # 失败时填充空值，避免整个程序崩溃
+            result_df[attr_name] = np.nan
+
+    # 6. 添加属性分级字段（根据业务规则实现）
+    logger.info("添加土壤属性分级字段")
+    result_df = add_attribute_classification(result_df)
+
+    # 7. 整理最终字段顺序（与示例表格完全对齐）
+    logger.info("整理输出字段顺序")
+    # TODO
+    final_columns = [
+        "FID","DM", "XZM", "QSDWDM", "QSDWMC",
+        "TL", "YL", "TS", "TZ", "DLBM", "DLMC",
+        "耕层厚度", "土壤容重", "砂粒", "粉粒", "黏粒", "酸碱度", "阳离子",
+        "有机质", "全氮", "全磷", "全钾", "有效磷", "速效钾", "有效铁", "有效锰",
+        "有效铜", "有效锌", "有效硼", "有效钼", "有效硫", "交换性钙", "交换性镁", "全硒",
+        "有效土层厚度", "土壤质地",
+        "耕层厚度分级", "土壤容重分级", "砂粒分级", "粉粒分级", "黏粒分级", "酸碱度分级", "阳离子分级", 
+        "有机质分级", "全氮分级", "全磷分级", "全钾分级", "有效铁分级", "速效钾分级", "有效铁分级", "有效锰分级", 
+        "有效铜分级", "有效锌分级", "有效硼分级", "有效钼分级", "有效硫分级", "交换性钙分级", "交换性镁分级", "全硒分级", 
+        "有效土层厚度分级", "土壤质地分级",
+        "面积亩"
+    ]
+
+    # 补充缺失的字段（如乡代码等）
+    for col in final_columns:
+        if col not in result_df.columns:
+            result_df[col] = np.nan
+
+    # 按最终顺序排列字段
+    result_df = result_df[final_columns]
+
+    # 8. 导出结果到Excel
+    logger.info(f"导出结果到：{output_path}")
+    # 使用openpyxl引擎支持.xlsx格式
+    result_df.to_excel(output_path, index=False, engine="openpyxl")
+    logger.info(f"结果导出完成，共生成{len(result_df)}条记录")
+
+    return result_df
+
+def add_attribute_classification(df):
+    """
+    添加土壤属性分级字段（根据常见土壤分类标准实现）
+    可根据实际业务需求调整分级阈值
+    :param df: 包含原始属性的DataFrame
+    :return: 包含分级字段的DataFrame
+    """
+    # todo
+    # 1. 酸碱度分级（pH值） - 按PH标准
+    def classify_ph(ph):
+        """
+        土壤pH值分级（第三次全国土壤普查标准）
+        标准等级:
+            等级一: 6.0～7.0
+            等级二: 7.0～7.5, 5.5～6.0
+            等级三: 7.5～8.0, 5.0～5.5
+            等级四: 8.0～8.5, 4.5～5.0
+            等级五: ＞8.5, ≤4.5
+        """
+        if ph > 8.5 or ph <= 4.5:
+            return 5  # 等级五: ＞8.5, ≤4.5
+        elif (8.0 < ph <= 8.5) or (4.5 < ph <= 5.0):
+            return 4  # 等级四: 8.0～8.5, 4.5～5.0
+        elif (7.5 < ph <= 8.0) or (5.0 < ph <= 5.5):
+            return 3  # 等级三: 7.5～8.0, 5.0～5.5
+        elif (7.0 < ph <= 7.5) or (5.5 < ph <= 6.0):
+            return 2  # 等级二: 7.0～7.5, 5.5～6.0
+        elif 6.0 < ph <= 7.0:
+            return 1  # 等级一: 6.0～7.0
+        else:
+            return None  # 异常值
+
+    # 2. 有机质分级（单位：g/kg） - 按OM标准
+    def classify_organic(organic):
+        """
+        土壤有机质分级（第三次全国土壤普查标准）
+        标准等级:
+            等级一: ＞35.0
+            等级二: 25.0～35.0
+            等级三: 15.0～25.0
+            等级四: 10.0～15.0
+            等级五: ≤10.0
+        """
+        if organic > 35.0:
+            return 1  # 等级一: ＞35.0
+        elif 25.0 < organic <= 35.0:
+            return 2  # 等级二: 25.0～35.0
+        elif 15.0 < organic <= 25.0:
+            return 3  # 等级三: 15.0～25.0
+        elif 10.0 < organic <= 15.0:
+            return 4  # 等级四: 10.0～15.0
+        elif organic <= 10.0:
+            return 5  # 等级五: ≤10.0
+        else:
+            return None  # 异常值
+
+    # 3. 阳离子交换量分级（单位：cmol/kg） - 按CEC标准
+    def classify_cation(cation):
+        """
+        土壤阳离子交换量分级（第三次全国土壤普查标准）
+        标准等级:
+            等级一: ＞30.0
+            等级二: 20.0～30.0
+            等级三: 15.0～20.0
+            等级四: 10.0～15.0
+            等级五: ≤10.0
+        """
+        if cation > 30.0:
+            return 1  # 等级一: ＞30.0
+        elif 20.0 < cation <= 30.0:
+            return 2  # 等级二: 20.0～30.0
+        elif 15.0 < cation <= 20.0:
+            return 3  # 等级三: 15.0～20.0
+        elif 10.0 < cation <= 15.0:
+            return 4  # 等级四: 10.0～15.0
+        elif cation <= 10.0:
+            return 5  # 等级五: ≤10.0
+        else:
+            return None  # 异常值
+
+    # 4. 有效磷分级（单位：mg/kg） - 按AP标准
+    def classify_available_p(p):
+        """
+        土壤有效磷分级（第三次全国土壤普查标准）
+        标准等级:
+            等级一: ＞40.0
+            等级二: 25.0～40.0
+            等级三: 15.0～25.0
+            等级四: 5.0～15.0
+            等级五: ≤5.0
+        """
+        if p > 40.0:
+            return 1  # 等级一: ＞40.0
+        elif 25.0 < p <= 40.0:
+            return 2  # 等级二: 25.0～40.0
+        elif 15.0 < p <= 25.0:
+            return 3  # 等级三: 15.0～25.0
+        elif 5.0 < p <= 15.0:
+            return 4  # 等级四: 5.0～15.0
+        elif p <= 5.0:
+            return 5  # 等级五: ≤5.0
+        else:
+            return None  # 异常值
+
+    # 5. 速效钾分级（单位：mg/kg） - 按AK标准
+    def classify_available_k(k):
+        """
+        土壤速效钾分级（第三次全国土壤普查标准）
+        标准等级:
+            等级一: ＞150
+            等级二: 100～150
+            等级三: 75～100
+            等级四: 50～75
+            等级五: ≤50
+        """
+        if k > 150:
+            return 1  # 等级一: ＞150
+        elif 100 < k <= 150:
+            return 2  # 等级二: 100～150
+        elif 75 < k <= 100:
+            return 3  # 等级三: 75～100
+        elif 50 < k <= 75:
+            return 4  # 等级四: 50～75
+        elif k <= 50:
+            return 5  # 等级五: ≤50
+        else:
+            return None  # 异常值
+
+    # 6. 耕层厚度分级（单位：cm） - 按GZCHD标准
+    def classify_soil_depth(depth):
+        """
+        土壤耕作层厚度分级（第三次全国土壤普查标准）
+        标准等级:
+            等级一: ＞25.0
+            等级二: 20.0～25.0
+            等级三: 15.0～20.0
+            等级四: 10.0～15.0
+            等级五: ≤10.0
+        """
+        if depth > 25.0:
+            return 1  # 等级一: ＞25.0
+        elif 20.0 < depth <= 25.0:
+            return 2  # 等级二: 20.0～25.0
+        elif 15.0 < depth <= 20.0:
+            return 3  # 等级三: 15.0～20.0
+        elif 10.0 < depth <= 15.0:
+            return 4  # 等级四: 10.0～15.0
+        elif depth <= 10.0:
+            return 5  # 等级五: ≤10.0
+        else:
+            return None  # 异常值
+
+    # 7. 土壤容重分级（单位：g/cm³） - 按TRRZ标准
+    def classify_bulk_density(density):
+        """
+        土壤容重分级（第三次全国土壤普查标准）
+        标准等级:
+            等级一: 1.10～1.25
+            等级二: 1.25～1.35, 1.00～1.10
+            等级三: 1.35～1.45
+            等级四: 1.45～1.55, 0.90～1.00
+            等级五: ＞1.55, ≤0.90
+        """
+        if 1.10 < density <= 1.25:
+            return 1    # 等级一: 1.10～1.25
+        elif (1.25 < density <= 1.35) or (1.00 < density <= 1.10):
+            return 2    # 等级二: 1.25～1.35, 1.00～1.10  
+        elif 1.35 < density <= 1.45:
+            return 3    # 等级三: 1.35～1.45
+        elif (1.45 < density <= 1.55) or (0.90 < density <= 1.00):
+            return 4    # 等级四: 1.45～1.55, 0.90～1.00
+        elif density > 1.55 or density <= 0.90:
+            return 5    # 等级五: ＞1.55, ≤0.90
+        else:
+            return None  # 异常值
+
+    # 8. 全氮分级（单位：g/kg） - 按TN标准
+    def classify_total_n(n):
+        """
+        土壤全氮分级（第三次全国土壤普查标准）
+        标准等级:
+            等级一: ＞2.00
+            等级二: 1.50～2.00
+            等级三: 1.00～1.50
+            等级四: 0.50～1.00
+            等级五: ≤0.50
+        """
+        if n > 2.00:
+            return 1  # 等级一: ＞2.00
+        elif 1.50 < n <= 2.00:
+            return 2  # 等级二: 1.50～2.00
+        elif 1.00 < n <= 1.50:
+            return 3  # 等级三: 1.00～1.50
+        elif 0.50 < n <= 1.00:
+            return 4  # 等级四: 0.50～1.00
+        elif n <= 0.50:
+            return 5  # 等级五: ≤0.50
+        else:
+            return None  # 异常值
+
+    # 9. 全磷分级（单位：g/kg） - 按TP标准
+    def classify_total_p(p):
+        """
+        土壤全磷分级（第三次全国土壤普查标准）
+        标准等级:
+            等级一: ＞1.00
+            等级二: 0.80～1.00
+            等级三: 0.60～0.80
+            等级四: 0.40～0.60
+            等级五: ≤0.40
+        """
+        if p > 1.00:
+            return 1  # 等级一: ＞1.00
+        elif 0.80 < p <= 1.00:
+            return 2  # 等级二: 0.80～1.00
+        elif 0.60 < p <= 0.80:
+            return 3  # 等级三: 0.60～0.80
+        elif 0.40 < p <= 0.60:
+            return 4  # 等级四: 0.40～0.60
+        elif p <= 0.40:
+            return 5  # 等级五: ≤0.40
+        else:
+            return None  # 异常值
+
+    # 10. 全钾分级（单位：g/kg） - 按TK标准
+    def classify_total_k(k):
+        """
+        土壤全钾分级（第三次全国土壤普查标准）
+        标准等级:
+            等级一: ＞20.0
+            等级二: 15.0～20.0
+            等级三: 10.0～15.0
+            等级四: 5.0～10.0
+            等级五: ≤5.0
+        """
+        if k > 20.0:
+            return 1  # 等级一: ＞20.0
+        elif 15.0 < k <= 20.0:
+            return 2  # 等级二: 15.0～20.0
+        elif 10.0 < k <= 15.0:
+            return 3  # 等级三: 10.0～15.0
+        elif 5.0 < k <= 10.0:
+            return 4  # 等级四: 5.0～10.0
+        elif k <= 5.0:
+            return 5  # 等级五: ≤5.0
+        else:
+            return None  # 异常值
+    
+    # 11. 有效铁分级（单位：mg/kg）
+    def classify_available_fe(fe):
+        """
+        土壤有效铁分级（第三次全国土壤普查标准）
+        
+        参数:
+            fe: 有效铁含量 (mg/kg)
+            
+        返回:
+            分级等级 (1-5)
+        """
+        if fe <= 3.0:
+            return 5  # 等级五: ≤3.0
+        elif 3.0 < fe <= 5.0:
+            return 4  # 等级四: 3.0～5.0
+        elif 5.0 < fe <= 10.0:
+            return 3  # 等级三: 5.0～10.0
+        elif 10.0 < fe <= 20.0:
+            return 2  # 等级二: 10.0～20.0
+        else:  # fe > 20.0
+            return 1  # 等级一: ＞20.0
+
+    # 12. 有效锌分级（单位：mg/kg）
+    def classify_available_zn(zn):
+        """
+        土壤有效锌分级（第三次全国土壤普查标准）
+        
+        参数:
+            zn: 有效锌含量 (mg/kg)
+            
+        返回:
+            分级等级 (1-5)
+        """
+        if zn <= 0.20:
+            return 5  # 等级五: ≤0.20
+        elif 0.20 < zn <= 0.50:
+            return 4  # 等级四: 0.20～0.50
+        elif 0.50 < zn <= 1.00:
+            return 3  # 等级三: 0.50～1.00
+        elif 1.00 < zn <= 3.00:
+            return 2  # 等级二: 1.00～3.00
+        else:  # zn > 3.00
+            return 1  # 等级一: ＞3.00
+
+    # 13. 有效锰分级（单位：mg/kg）
+    def classify_available_mn(mn):
+        """
+        土壤有效锰分级（第三次全国土壤普查标准）
+        
+        参数:
+            mn: 有效锰含量 (mg/kg)
+            
+        返回:
+            分级等级 (1-5)
+        """
+        if mn <= 1.0:
+            return 5  # 等级五: ≤1.0
+        elif 1.0 < mn <= 5.0:
+            return 4  # 等级四: 1.0～5.0
+        elif 5.0 < mn <= 15.0:
+            return 3  # 等级三: 5.0～15.0
+        elif 15.0 < mn <= 30.0:
+            return 2  # 等级二: 15.0～30.0
+        else:  # mn > 30.0
+            return 1  # 等级一: ＞30.0
+
+    # 14. 有效铜分级（单位：mg/kg）
+    def classify_available_cu(cu):
+        """
+        土壤有效铜分级（第三次全国土壤普查标准）
+        
+        参数:
+            cu: 有效铜含量 (mg/kg)
+            
+        返回:
+            分级等级 (1-5)
+        """
+        if cu <= 0.20:
+            return 5  # 等级五: ≤0.20
+        elif 0.20 < cu <= 0.50:
+            return 4  # 等级四: 0.20～0.50
+        elif 0.50 < cu <= 1.00:
+            return 3  # 等级三: 0.50～1.00
+        elif 1.00 < cu <= 2.00:
+            return 2  # 等级二: 1.00～2.00
+        else:  # cu > 2.00
+            return 1  # 等级一: ＞2.00
+
+    # 15. 有效硼分级（单位：mg/kg）
+    def classify_available_b(b):
+        """
+        土壤有效硼分级（第三次全国土壤普查标准）
+        
+        参数:
+            b: 有效硼含量 (mg/kg)
+            
+        返回:
+            分级等级 (1-5)
+        """
+        if b <= 0.20:
+            return 5  # 等级五: ≤0.20
+        elif 0.20 < b <= 0.50:
+            return 4  # 等级四: 0.20～0.50
+        elif 0.50 < b <= 0.80:
+            return 3  # 等级三: 0.50～0.80
+        elif 0.80 < b <= 1.00:
+            return 2  # 等级二: 0.80～1.00
+        else:  # b > 1.00
+            return 1  # 等级一: ＞1.00
+
+    # 16. 有效钼分级（单位：mg/kg）
+    def classify_available_mo(mo):
+        """
+        土壤有效钼分级（第三次全国土壤普查标准）
+        
+        参数:
+            mo: 有效钼含量 (mg/kg)
+            
+        返回:
+            分级等级 (1-5)
+        """
+        if mo <= 0.05:
+            return 5  # 等级五: ≤0.05
+        elif 0.05 < mo <= 0.10:
+            return 4  # 等级四: 0.05～0.10
+        elif 0.10 < mo <= 0.15:
+            return 3  # 等级三: 0.10～0.15
+        elif 0.15 < mo <= 0.20:
+            return 2  # 等级二: 0.15～0.20
+        else:  # mo > 0.20
+            return 1  # 等级一: ＞0.20
+
+    # 17. 有效硫分级（单位：mg/kg）
+    def classify_available_s(s):
+        """
+        土壤有效硫分级（第三次全国土壤普查标准）
+        
+        参数:
+            s: 有效硫含量 (mg/kg)
+            
+        返回:
+            分级等级 (1-5)
+        """
+        if s <= 10.0:
+            return 5  # 等级五: ≤10.0
+        elif 10.0 < s <= 20.0:
+            return 4  # 等级四: 10.0～20.0
+        elif 20.0 < s <= 30.0:
+            return 3  # 等级三: 20.0～30.0
+        elif 30.0 < s <= 40.0:
+            return 2  # 等级二: 30.0～40.0
+        else:  # s > 40.0
+            return 1  # 等级一: ＞40.0
+
+    # 18. 交换性钙分级（单位：cmol(½Ca²⁺)/kg）
+    def classify_exchangeable_ca(ca):
+        """
+        土壤交换性钙分级（第三次全国土壤普查标准）
+        
+        参数:
+            ca: 交换性钙含量 (cmol(½Ca²⁺)/kg)
+            
+        返回:
+            分级等级 (1-5)
+        """
+        if ca <= 1.00:
+            return 5  # 等级五: ≤1.00
+        elif 1.00 < ca <= 2.50:
+            return 4  # 等级四: 1.00～2.50
+        elif 2.50 < ca <= 4.99:
+            return 3  # 等级三: 2.50～4.99
+        elif 4.99 < ca <= 7.49:
+            return 2  # 等级二: 4.99～7.49
+        else:  # ca > 7.49
+            return 1  # 等级一: ＞7.49
+
+    # 19. 交换性镁分级（单位：cmol(½Mg²⁺)/kg）
+    def classify_exchangeable_mg(mg):
+        """
+        土壤交换性镁分级（第三次全国土壤普查标准）
+        
+        参数:
+            mg: 交换性镁含量 (cmol(½Mg²⁺)/kg)
+            
+        返回:
+            分级等级 (1-5)
+        """
+        if mg <= 0.41:
+            return 5  # 等级五: ≤0.41
+        elif 0.41 < mg <= 0.82:
+            return 4  # 等级四: 0.41～0.82
+        elif 0.82 < mg <= 1.23:
+            return 3  # 等级三: 0.82～1.23
+        elif 1.23 < mg <= 1.64:
+            return 2  # 等级二: 1.23～1.64
+        else:  # mg > 1.64
+            return 1  # 等级一: ＞1.64
+
+    # 20. 全硒分级（单位：mg/kg）
+    def classify_total_se(se):
+        """
+        土壤全硒分级（第三次全国土壤普查标准）
+        
+        参数:
+            se: 全硒含量 (mg/kg)
+            
+        返回:
+            分级等级 (1-4)
+        """
+        if se <= 0.17:
+            return 4  # 等级四: ≤0.17
+        elif 0.17 < se <= 0.40:
+            return 3  # 等级三: 0.17～0.40
+        elif 0.40 < se <= 3.00:
+            return 2  # 等级二: 0.40～3.00
+        else:  # se > 3.00
+            return 1  # 等级一: ＞3.00
+
+    # 21. 粉粒含量分级（单位：%）
+    def classify_silt(silt):
+        """
+        土壤粉粒含量分级（第三次全国土壤普查标准）
+        
+        参数:
+            silt: 粉粒含量 (%)
+            
+        返回:
+            分级等级 (1-5)
+        """
+        if silt > 75:
+            return 5  # 等级五: ＞75
+        elif 45 < silt <= 75:
+            return 4  # 等级四: 45～75
+        elif 30 < silt <= 45:
+            return 3  # 等级三: 30～45
+        elif 15 < silt <= 30:
+            return 2  # 等级二: 15～30
+        else:  # silt <= 15
+            return 1  # 等级一: ≤15
+
+    # 22. 黏粒含量分级（单位：%）
+    def classify_clay(clay):
+        """
+        土壤黏粒含量分级（第三次全国土壤普查标准）
+        
+        参数:
+            clay: 黏粒含量 (%)
+            
+        返回:
+            分级等级 (1-5)
+        """
+        if clay > 65:
+            return 5  # 等级五: ＞65
+        elif 45 < clay <= 65:
+            return 4  # 等级四: 45～65
+        elif 25 < clay <= 45:
+            return 3  # 等级三: 25～45
+        elif 15 < clay <= 25:
+            return 2  # 等级二: 15～25
+        else:  # clay <= 15
+            return 1  # 等级一: ≤15
+
+    # 23. 砂粒含量分级（单位：%）
+    def classify_sand(sand):
+        """
+        土壤砂粒含量分级（第三次全国土壤普查标准）
+        
+        参数:
+            sand: 砂粒含量 (%)
+            
+        返回:
+            分级等级 (1-5)
+        """
+        if sand > 85:
+            return 5  # 等级五: ＞85
+        elif 55 < sand <= 85:
+            return 4  # 等级四: 55～85
+        elif 40 < sand <= 55:
+            return 3  # 等级三: 40～55
+        elif 30 < sand <= 40:
+            return 2  # 等级二: 30～40
+        else:  # sand <= 30
+            return 1  # 等级一: ≤30
+    
+    # 24. 有效土层厚度分级（单位：cm）
+    def classify_yxtchd(depth):
+        """
+        土壤有效土层厚度分级（第三次全国土壤普查标准）
+        
+        参数:
+            depth: 有效土层厚度 (cm)
+            
+        返回:
+            分级等级 (1-5)
+        """
+        if depth <= 40:
+            return 5  # 等级五: ≤40
+        elif 40 < depth <= 60:
+            return 4  # 等级四: 40～60
+        elif 60 < depth <= 80:
+            return 3  # 等级三: 60～80
+        elif 80 < depth <= 100:
+            return 2  # 等级二: 80～100
+        else:  # depth > 100
+            return 1  # 等级一: ＞100 
+
+    # 25. 土壤质地
+    def classify_trzd(trzd):
+        """
+        土壤质地
+        
+        参数:
+            trzd: 土壤质地分类（1-5）
+            
+        返回:
+            分级等级 (1-5)
+        """
+        trzd = round(trzd, 0)
+        if trzd == 5:
+            return 5  # 等级五: ≤40
+        elif trzd == 4:
+            return 4  # 等级四: 40～60
+        elif trzd == 3:
+            return 3  # 等级三: 60～80
+        elif trzd == 2:
+            return 2  # 等级二: 80～100
+        else:  # depth > 100
+            return 1  # 等级一: ＞100 
+        
+
+    # 应用分级函数（只处理非空值）
+    if "酸碱度" in df.columns:
+        df["酸碱度分级"] = df["酸碱度"].apply(lambda x: classify_ph(x) if pd.notna(x) else np.nan)
+    if "有机质" in df.columns:
+        df["有机质分级"] = df["有机质"].apply(lambda x: classify_organic(x) if pd.notna(x) else np.nan)
+    if "阳离子" in df.columns:
+        df["阳离子分级"] = df["阳离子"].apply(lambda x: classify_cation(x) if pd.notna(x) else np.nan)
+    if "有效磷" in df.columns:
+        df["有效磷分级"] = df["有效磷"].apply(lambda x: classify_available_p(x) if pd.notna(x) else np.nan)
+    if "速效钾" in df.columns:
+        df["速效钾分级"] = df["速效钾"].apply(lambda x: classify_available_k(x) if pd.notna(x) else np.nan)
+    if "耕层厚度" in df.columns:
+        df["耕层厚度分级"] = df["耕层厚度"].apply(lambda x: classify_soil_depth(x) if pd.notna(x) else np.nan)
+    if "土壤容重" in df.columns:
+        df["土壤容重分级"] = df["土壤容重"].apply(lambda x: classify_bulk_density(x) if pd.notna(x) else np.nan)
+    if "全氮" in df.columns:
+        df["全氮分级"] = df["全氮"].apply(lambda x: classify_total_n(x) if pd.notna(x) else np.nan)
+    if "全磷" in df.columns:
+        df["全磷分级"] = df["全磷"].apply(lambda x: classify_total_p(x) if pd.notna(x) else np.nan)
+    if "全钾" in df.columns:
+        df["全钾分级"] = df["全钾"].apply(lambda x: classify_total_k(x) if pd.notna(x) else np.nan)
+    if "有效铁" in df.columns:
+        df["有效铁分级"] = df["有效铁"].apply(lambda x: classify_available_fe(x) if pd.notna(x) else np.nan)
+    if "有效锌" in df.columns:
+        df["有效锌分级"] = df["有效锌"].apply(lambda x: classify_available_zn(x) if pd.notna(x) else np.nan)
+    if "有效锰" in df.columns:
+        df["有效锰分级"] = df["有效锰"].apply(lambda x: classify_available_mn(x) if pd.notna(x) else np.nan)
+    if "有效铜" in df.columns:
+        df["有效铜分级"] = df["有效铜"].apply(lambda x: classify_available_cu(x) if pd.notna(x) else np.nan)
+    if "有效硼" in df.columns:
+        df["有效硼分级"] = df["有效硼"].apply(lambda x: classify_available_b(x) if pd.notna(x) else np.nan)
+    if "有效钼" in df.columns:
+        df["有效钼分级"] = df["有效钼"].apply(lambda x: classify_available_mo(x) if pd.notna(x) else np.nan)
+    if "有效硫" in df.columns:
+        df["有效硫分级"] = df["有效硫"].apply(lambda x: classify_available_s(x) if pd.notna(x) else np.nan)
+    if "交换性钙" in df.columns:
+        df["交换性钙分级"] = df["交换性钙"].apply(lambda x: classify_exchangeable_ca(x) if pd.notna(x) else np.nan)
+    if "交换性镁" in df.columns:
+        df["交换性镁分级"] = df["交换性镁"].apply(lambda x: classify_exchangeable_mg(x) if pd.notna(x) else np.nan)
+    if "全硒" in df.columns:
+        df["全硒分级"] = df["全硒"].apply(lambda x: classify_total_se(x) if pd.notna(x) else np.nan)
+    if "粉粒" in df.columns:
+        df["粉粒分级"] = df["粉粒"].apply(lambda x: classify_silt(x) if pd.notna(x) else np.nan)
+    if "黏粒" in df.columns:
+        df["黏粒分级"] = df["黏粒"].apply(lambda x: classify_clay(x) if pd.notna(x) else np.nan)
+    if "砂粒" in df.columns:
+        df["砂粒分级"] = df["砂粒"].apply(lambda x: classify_sand(x) if pd.notna(x) else np.nan)
+    if "有效土层厚度" in df.columns:
+        df["有效土层厚度分级"] = df["有效土层厚度"].apply(lambda x: classify_yxtchd(x) if pd.notna(x) else np.nan)
+    if "土壤质地" in df.columns:
+        df["土壤质地分级"] = df["土壤质地"].apply(lambda x: classify_trzd(x) if pd.notna(x) else np.nan)
+
+    return df
+
+def main():
+    """
+    主函数：程序入口
+    用户需根据实际情况修改以下参数
+    """
+    logger = init_logger()
+    logger.info("="*50)
+    logger.info("土壤属性栅格面积加权统计程序启动")
+    logger.info("="*50)
+
+    # --------------------------
+    # 用户配置区域（必须修改！）
+    # --------------------------
+    # 1. 矢量图斑文件路径（支持Shapefile、GeoPackage等格式）
+    # TODO
+    VECTOR_PATH = r"D:\工作\三普成果编制\出图数据\北海\三普栅格\DL_ALL.shp"  # 示例："D:/data/土壤图斑.shp"
+
+    # 2. 土壤属性栅格文件配置（键：属性名称，值：栅格文件路径）
+    # 注意：属性名称必须与最终表格列名一致
+    # TODO
+    RASTER_FILES = {
+        "耕层厚度": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\GZCHD.tif",       # 示例："D:/data/耕层厚度.tif"
+        "土壤容重": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\TRRZ.tif",       # 示例："D:/data/土壤容重.tif"
+        "砂粒": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\SL.tif",           # 示例："D:/data/砂粒含量.tif"
+        "粉粒": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\FL.tif",           # 示例："D:/data/粉粒含量.tif"
+        "黏粒": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\NL.tif",           # 示例："D:/data/黏粒含量.tif"
+        "酸碱度": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\PH.tif",             # 示例："D:/data/pH值.tif"
+        "阳离子": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\CEC.tif",     # 示例："D:/data/阳离子交换量.tif"
+        "有机质": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\OM.tif",       # 示例："D:/data/有机质含量.tif"
+        "全氮": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\TN.tif",           # 示例："D:/data/全氮含量.tif"
+        "全磷": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\TP.tif",           # 示例："D:/data/全磷含量.tif"
+        "全钾": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\TK.tif",           # 示例："D:/data/全钾含量.tif"
+        "有效磷": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\AP.tif",       # 示例："D:/data/有效磷含量.tif"
+        "速效钾": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\AK.tif",        # 示例："D:/data/速效钾含量.tif"
+        # "有效铁": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\AFE.tif",    # 示例："D:/data/有效铁含量.tif"
+        "有效锌": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\AZN.tif",    # 示例："D:/data/有效锌含量.tif"
+        "有效锰": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\AMN.tif",    # 示例："D:/data/有效锰含量.tif"
+        "有效铜": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\ACU.tif",    # 示例："D:/data/有效铜含量.tif"
+        "有效硼": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\AB.tif",      # 示例："D:/data/有效硼含量.tif"
+        "有效钼": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\AMO.tif",    # 示例："D:/data/有效钼含量.tif"
+        "有效硫": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\AS1.tif",      # 示例："D:/data/有效硫含量.tif"
+        "交换性钙": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\ECA.tif", # 示例："D:/data/交换性钙含量.tif"
+        "交换性镁": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\EMG.tif", # 示例："D:/data/交换性镁含量.tif"
+        "全硒": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\TSE.tif",        # 示例："D:/data/全硒含量.tif"
+        "有效土层厚度": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\YXTCHD.tif", # 示例："D:/data/有效土层厚度.tif"
+        "土壤质地": r"D:\工作\三普成果编制\出图数据\北海\三普栅格\TRZD.tif",     # 示例："D:/data/土壤质地.tif"
+    }
+
+    # 3. 结果输出路径（Excel文件）
+    OUTPUT_PATH = "土壤属性图斑数据_面积加权结果.xlsx"  # 示例："D:/result/结果.xlsx"
+
+    # --------------------------
+    # 程序执行流程（无需修改）
+    # --------------------------
+    try:
+        # 1. 数据验证
+        if not validate_data(VECTOR_PATH, RASTER_FILES):
+            logger.error("数据验证失败，程序终止")
+            return
+
+        # 2. 执行面积加权统计
+        result_df = calculate_area_weighted_stats(VECTOR_PATH, RASTER_FILES, OUTPUT_PATH)
+
+        # 3. 显示结果预览
+        logger.info("\\n结果预览（前3行）：")
+        print(result_df.head(3).to_string(index=False))
+
+        logger.info("\\n" + "="*50)
+        logger.info("程序执行完成！")
+        logger.info(f"结果文件：{OUTPUT_PATH}")
+        logger.info("="*50)
+
+    except Exception as e:
+        logger.error(f"程序执行出错：{str(e)}", exc_info=True)
+        logger.error("程序异常终止")
+
+if __name__ == "__main__":
+    # 启动主程序
+    main()