refactor: 重构项目结构，将geo_tools重命名为app并更新相关引用

- 将主包名从geo_tools改为app - 更新所有模块中的引用路径 - 迁移并更新测试用例 - 添加项目规则文档 - 保持原有功能不变，仅进行结构调整
2026-04-12 19:49:56 +08:00
parent fcb8e1f255
commit db51d41aef
41 changed files with 4132 additions and 808 deletions
--- a/app/analysis/init.py
+++ b/app/analysis/init.py
@@ -0,0 +1 @@
+"""geo_tools.analysis 包 —— 空间分析层。"""
--- a/app/analysis/spatial_ops.py
+++ b/app/analysis/spatial_ops.py
@@ -0,0 +1,149 @@
+"""
+geo_tools.analysis.spatial_ops
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+空间叠加与邻域分析操作。
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import geopandas as gpd
+import pandas as pd
+
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+def buffer_and_overlay(
+    source: gpd.GeoDataFrame,
+    distance: float,
+    target: gpd.GeoDataFrame,
+    how: str = "intersection",
+    projected_crs: str | None = None,
+) -> gpd.GeoDataFrame:
+    """对 source 执行缓冲区后与 target 执行叠置分析。
+
+    Parameters
+    ----------
+    source:
+        源图层（生成缓冲区）。
+    distance:
+        缓冲距离（与 ``projected_crs`` 单位一致）。
+    target:
+        叠置目标图层。
+    how:
+        叠置类型：``"intersection"``、``"union"``、``"difference"``、``"symmetric_difference"``、``"identity"``。
+    projected_crs:
+        执行缓冲区前先投影到此 CRS（建议使用平面坐标系以保证距离精度）；
+        ``None`` 则使用 source 的当前 CRS（地理 CRS 下 distance 单位为度）。
+
+    Returns
+    -------
+    gpd.GeoDataFrame
+    """
+    original_crs = source.crs
+
+    if projected_crs:
+        source = source.to_crs(projected_crs)
+        target = target.to_crs(projected_crs)
+
+    buffered = source.copy()
+    buffered["geometry"] = buffered.geometry.buffer(distance)
+    logger.debug("缓冲区完成（distance=%.2f），执行叠置分析（how=%s）", distance, how)
+
+    result = gpd.overlay(buffered, target, how=how, keep_geom_type=False)
+
+    if projected_crs:
+        result = result.to_crs(original_crs) # type: ignore
+
+    logger.info("叠置分析完成：%d 条结果", len(result))
+    return result
+
+
+def overlay(
+    df1: gpd.GeoDataFrame,
+    df2: gpd.GeoDataFrame,
+    how: str = "intersection",
+    keep_geom_type: bool = True,
+) -> gpd.GeoDataFrame:
+    """封装 geopandas overlay，自动对齐 CRS。
+
+    Parameters
+    ----------
+    how:
+        叠置类型：``"intersection"``、``"union"``、``"difference"``、
+        ``"symmetric_difference"``、``"identity"``。
+    """
+    if df1.crs != df2.crs:
+        df2 = df2.to_crs(df1.crs)   # type: ignore
+    result = gpd.overlay(df1, df2, how=how, keep_geom_type=keep_geom_type)
+    logger.debug("overlay(%s)：%d 条结果", how, len(result))
+    return result
+
+
+def nearest_features(
+    source: gpd.GeoDataFrame,
+    target: gpd.GeoDataFrame,
+    k: int = 1,
+    max_distance: float | None = None,
+) -> gpd.GeoDataFrame:
+    """为 source 中每条要素找到 target 中最近的 k 个要素。
+
+    Parameters
+    ----------
+    source:
+        查询图层。
+    target:
+        被查询图层。
+    k:
+        最近邻数量。
+    max_distance:
+        最大搜索距离（与 CRS 单位一致），``None`` 表示无限制。
+
+    Returns
+    -------
+    gpd.GeoDataFrame
+        连接了最近 target 属性的 source GDF（可能包含重复行，每行对应一个近邻）。
+    """
+    if source.crs != target.crs:
+        target = target.to_crs(source.crs)  # type: ignore
+
+    result = gpd.sjoin_nearest(
+        source,
+        target,
+        how="left",
+        max_distance=max_distance,
+        distance_col="nearest_distance",
+        lsuffix="left",
+        rsuffix="right",
+    )
+    logger.debug("最近邻分析完成（k=%d）：%d 条结果", k, len(result))
+    return result
+
+
+def select_by_location(
+    source: gpd.GeoDataFrame,
+    selector: gpd.GeoDataFrame,
+    predicate: str = "intersects",
+) -> gpd.GeoDataFrame:
+    """按位置关系从 source 中选取要素（等同于 ArcGIS「按位置选择」）。
+
+    Parameters
+    ----------
+    predicate:
+        空间谓词：``"intersects"``、``"within"``、``"contains"``、``"touches"``。
+
+    Returns
+    -------
+    gpd.GeoDataFrame
+        满足条件的 source 子集。
+    """
+    if source.crs != selector.crs:
+        selector = selector.to_crs(source.crs)  # type: ignore
+
+    joined = gpd.sjoin(source, selector, how="inner", predicate=predicate)
+    result = source.loc[source.index.isin(joined.index)].copy()
+    logger.debug("按位置选择（%s）：%d / %d 条", predicate, len(result), len(source))
+    return result
--- a/app/analysis/stats.py
+++ b/app/analysis/stats.py
@@ -0,0 +1,136 @@
+"""
+geo_tools.analysis.stats
+~~~~~~~~~~~~~~~~~~~~~~~~~
+空间统计工具：属性汇总、面积加权均值、空间自相关指数等。
+"""
+
+from __future__ import annotations
+
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+def area_weighted_mean(
+    gdf: gpd.GeoDataFrame,
+    value_col: str,
+    group_col: str | None = None,
+    projected_crs: str = "EPSG:3857",
+) -> pd.Series | pd.DataFrame:
+    """计算面积加权均值。
+
+    Parameters
+    ----------
+    gdf:
+        输入 GeoDataFrame（面要素）。
+    value_col:
+        需要加权平均的属性列名。
+    group_col:
+        分组字段名；若为 ``None`` 则对整个 GDF 计算单一结果。
+    projected_crs:
+        用于计算面积的平面投影 CRS。
+
+    Returns
+    -------
+    pd.Series（无分组）或 pd.DataFrame（有分组）
+    """
+    gdf = gdf.copy()
+
+    # 计算面积
+    if not gdf.crs or not gdf.crs.is_projected:
+        projected = gdf.to_crs(projected_crs)
+    else:
+        projected = gdf
+    gdf["_area"] = projected.geometry.area
+
+    if group_col is None:
+        total_area = gdf["_area"].sum()
+        result = (gdf[value_col] * gdf["_area"]).sum() / total_area
+        return pd.Series({"area_weighted_mean": result, "total_area": total_area})
+
+    def _weighted(group: pd.DataFrame) -> float:
+        return float((group[value_col] * group["_area"]).sum() / group["_area"].sum())
+
+    result = gdf.groupby(group_col).apply(_weighted, include_groups=False).rename("area_weighted_mean") # type: ignore[no-untyped-call]
+    area_sum = gdf.groupby(group_col)["_area"].sum().rename("total_area")
+    return pd.concat([result, area_sum], axis=1).reset_index()
+
+
+def summarize_attributes(
+    gdf: gpd.GeoDataFrame,
+    columns: list[str] | None = None,
+    group_col: str | None = None,
+    agg_funcs: list[str] | None = None,
+) -> pd.DataFrame:
+    """对属性列进行统计汇总（最大、最小、均值、总和等）。
+
+    Parameters
+    ----------
+    gdf:
+        输入 GeoDataFrame。
+    columns:
+        统计的列名列表；``None`` 则自动选取所有数值列。
+    group_col:
+        分组字段名；``None`` 则对全局统计。
+    agg_funcs:
+        聚合函数列表，默认 ``["count", "mean", "min", "max", "sum", "std"]``。
+
+    Returns
+    -------
+    pd.DataFrame
+    """
+    if agg_funcs is None:
+        agg_funcs = ["count", "mean", "min", "max", "sum", "std"]
+
+    df = gdf.drop(columns=["geometry"], errors="ignore")
+
+    if columns is None:
+        columns = df.select_dtypes(include="number").columns.tolist()
+
+    if not columns:
+        raise ValueError("未找到数值列，请显式指定 columns 参数。")
+
+    subset = df[columns]
+
+    if group_col is None:
+        return subset.agg(agg_funcs).T.rename_axis("column").reset_index()  # type: ignore[no-untyped-call]
+
+    df_with_group = df[[group_col] + columns]
+    return df_with_group.groupby(group_col)[columns].agg(agg_funcs).reset_index()
+
+
+def count_by_polygon(
+    points: gpd.GeoDataFrame,
+    polygons: gpd.GeoDataFrame,
+    count_col: str = "point_count",
+) -> gpd.GeoDataFrame:
+    """统计每个面要素内的点要素数量（类似 ArcGIS「面要素统计点」）。
+
+    Parameters
+    ----------
+    points:
+        点图层。
+    polygons:
+        面图层。
+    count_col:
+        新增计数列名。
+
+    Returns
+    -------
+    gpd.GeoDataFrame
+        含 ``count_col`` 列的 polygons 副本。
+    """
+    if points.crs != polygons.crs:
+        points = points.to_crs(polygons.crs)    # type: ignore
+
+    joined = gpd.sjoin(points, polygons, how="inner", predicate="within")
+    point_counts = joined.groupby("index_right").size().rename(count_col)
+
+    result = polygons.copy()
+    result = result.join(point_counts)
+    result[count_col] = result[count_col].fillna(0).astype(int)
+    return result
				`@@ -0,0 +1 @@`
				`"""geo_tools.analysis 包 —— 空间分析层。"""`