refactor: 重构项目结构,将geo_tools重命名为app并更新相关引用
- 将主包名从geo_tools改为app - 更新所有模块中的引用路径 - 迁移并更新测试用例 - 添加项目规则文档 - 保持原有功能不变,仅进行结构调整
This commit is contained in:
1
app/analysis/__init__.py
Normal file
1
app/analysis/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""geo_tools.analysis 包 —— 空间分析层。"""
|
||||
149
app/analysis/spatial_ops.py
Normal file
149
app/analysis/spatial_ops.py
Normal file
@@ -0,0 +1,149 @@
|
||||
"""
|
||||
geo_tools.analysis.spatial_ops
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
空间叠加与邻域分析操作。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import geopandas as gpd
|
||||
import pandas as pd
|
||||
|
||||
from app.utils.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def buffer_and_overlay(
|
||||
source: gpd.GeoDataFrame,
|
||||
distance: float,
|
||||
target: gpd.GeoDataFrame,
|
||||
how: str = "intersection",
|
||||
projected_crs: str | None = None,
|
||||
) -> gpd.GeoDataFrame:
|
||||
"""对 source 执行缓冲区后与 target 执行叠置分析。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source:
|
||||
源图层(生成缓冲区)。
|
||||
distance:
|
||||
缓冲距离(与 ``projected_crs`` 单位一致)。
|
||||
target:
|
||||
叠置目标图层。
|
||||
how:
|
||||
叠置类型:``"intersection"``、``"union"``、``"difference"``、``"symmetric_difference"``、``"identity"``。
|
||||
projected_crs:
|
||||
执行缓冲区前先投影到此 CRS(建议使用平面坐标系以保证距离精度);
|
||||
``None`` 则使用 source 的当前 CRS(地理 CRS 下 distance 单位为度)。
|
||||
|
||||
Returns
|
||||
-------
|
||||
gpd.GeoDataFrame
|
||||
"""
|
||||
original_crs = source.crs
|
||||
|
||||
if projected_crs:
|
||||
source = source.to_crs(projected_crs)
|
||||
target = target.to_crs(projected_crs)
|
||||
|
||||
buffered = source.copy()
|
||||
buffered["geometry"] = buffered.geometry.buffer(distance)
|
||||
logger.debug("缓冲区完成(distance=%.2f),执行叠置分析(how=%s)", distance, how)
|
||||
|
||||
result = gpd.overlay(buffered, target, how=how, keep_geom_type=False)
|
||||
|
||||
if projected_crs:
|
||||
result = result.to_crs(original_crs) # type: ignore
|
||||
|
||||
logger.info("叠置分析完成:%d 条结果", len(result))
|
||||
return result
|
||||
|
||||
|
||||
def overlay(
|
||||
df1: gpd.GeoDataFrame,
|
||||
df2: gpd.GeoDataFrame,
|
||||
how: str = "intersection",
|
||||
keep_geom_type: bool = True,
|
||||
) -> gpd.GeoDataFrame:
|
||||
"""封装 geopandas overlay,自动对齐 CRS。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
how:
|
||||
叠置类型:``"intersection"``、``"union"``、``"difference"``、
|
||||
``"symmetric_difference"``、``"identity"``。
|
||||
"""
|
||||
if df1.crs != df2.crs:
|
||||
df2 = df2.to_crs(df1.crs) # type: ignore
|
||||
result = gpd.overlay(df1, df2, how=how, keep_geom_type=keep_geom_type)
|
||||
logger.debug("overlay(%s):%d 条结果", how, len(result))
|
||||
return result
|
||||
|
||||
|
||||
def nearest_features(
|
||||
source: gpd.GeoDataFrame,
|
||||
target: gpd.GeoDataFrame,
|
||||
k: int = 1,
|
||||
max_distance: float | None = None,
|
||||
) -> gpd.GeoDataFrame:
|
||||
"""为 source 中每条要素找到 target 中最近的 k 个要素。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source:
|
||||
查询图层。
|
||||
target:
|
||||
被查询图层。
|
||||
k:
|
||||
最近邻数量。
|
||||
max_distance:
|
||||
最大搜索距离(与 CRS 单位一致),``None`` 表示无限制。
|
||||
|
||||
Returns
|
||||
-------
|
||||
gpd.GeoDataFrame
|
||||
连接了最近 target 属性的 source GDF(可能包含重复行,每行对应一个近邻)。
|
||||
"""
|
||||
if source.crs != target.crs:
|
||||
target = target.to_crs(source.crs) # type: ignore
|
||||
|
||||
result = gpd.sjoin_nearest(
|
||||
source,
|
||||
target,
|
||||
how="left",
|
||||
max_distance=max_distance,
|
||||
distance_col="nearest_distance",
|
||||
lsuffix="left",
|
||||
rsuffix="right",
|
||||
)
|
||||
logger.debug("最近邻分析完成(k=%d):%d 条结果", k, len(result))
|
||||
return result
|
||||
|
||||
|
||||
def select_by_location(
|
||||
source: gpd.GeoDataFrame,
|
||||
selector: gpd.GeoDataFrame,
|
||||
predicate: str = "intersects",
|
||||
) -> gpd.GeoDataFrame:
|
||||
"""按位置关系从 source 中选取要素(等同于 ArcGIS「按位置选择」)。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
predicate:
|
||||
空间谓词:``"intersects"``、``"within"``、``"contains"``、``"touches"``。
|
||||
|
||||
Returns
|
||||
-------
|
||||
gpd.GeoDataFrame
|
||||
满足条件的 source 子集。
|
||||
"""
|
||||
if source.crs != selector.crs:
|
||||
selector = selector.to_crs(source.crs) # type: ignore
|
||||
|
||||
joined = gpd.sjoin(source, selector, how="inner", predicate=predicate)
|
||||
result = source.loc[source.index.isin(joined.index)].copy()
|
||||
logger.debug("按位置选择(%s):%d / %d 条", predicate, len(result), len(source))
|
||||
return result
|
||||
136
app/analysis/stats.py
Normal file
136
app/analysis/stats.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
geo_tools.analysis.stats
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
空间统计工具:属性汇总、面积加权均值、空间自相关指数等。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import geopandas as gpd
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from app.utils.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def area_weighted_mean(
|
||||
gdf: gpd.GeoDataFrame,
|
||||
value_col: str,
|
||||
group_col: str | None = None,
|
||||
projected_crs: str = "EPSG:3857",
|
||||
) -> pd.Series | pd.DataFrame:
|
||||
"""计算面积加权均值。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
gdf:
|
||||
输入 GeoDataFrame(面要素)。
|
||||
value_col:
|
||||
需要加权平均的属性列名。
|
||||
group_col:
|
||||
分组字段名;若为 ``None`` 则对整个 GDF 计算单一结果。
|
||||
projected_crs:
|
||||
用于计算面积的平面投影 CRS。
|
||||
|
||||
Returns
|
||||
-------
|
||||
pd.Series(无分组)或 pd.DataFrame(有分组)
|
||||
"""
|
||||
gdf = gdf.copy()
|
||||
|
||||
# 计算面积
|
||||
if not gdf.crs or not gdf.crs.is_projected:
|
||||
projected = gdf.to_crs(projected_crs)
|
||||
else:
|
||||
projected = gdf
|
||||
gdf["_area"] = projected.geometry.area
|
||||
|
||||
if group_col is None:
|
||||
total_area = gdf["_area"].sum()
|
||||
result = (gdf[value_col] * gdf["_area"]).sum() / total_area
|
||||
return pd.Series({"area_weighted_mean": result, "total_area": total_area})
|
||||
|
||||
def _weighted(group: pd.DataFrame) -> float:
|
||||
return float((group[value_col] * group["_area"]).sum() / group["_area"].sum())
|
||||
|
||||
result = gdf.groupby(group_col).apply(_weighted, include_groups=False).rename("area_weighted_mean") # type: ignore[no-untyped-call]
|
||||
area_sum = gdf.groupby(group_col)["_area"].sum().rename("total_area")
|
||||
return pd.concat([result, area_sum], axis=1).reset_index()
|
||||
|
||||
|
||||
def summarize_attributes(
|
||||
gdf: gpd.GeoDataFrame,
|
||||
columns: list[str] | None = None,
|
||||
group_col: str | None = None,
|
||||
agg_funcs: list[str] | None = None,
|
||||
) -> pd.DataFrame:
|
||||
"""对属性列进行统计汇总(最大、最小、均值、总和等)。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
gdf:
|
||||
输入 GeoDataFrame。
|
||||
columns:
|
||||
统计的列名列表;``None`` 则自动选取所有数值列。
|
||||
group_col:
|
||||
分组字段名;``None`` 则对全局统计。
|
||||
agg_funcs:
|
||||
聚合函数列表,默认 ``["count", "mean", "min", "max", "sum", "std"]``。
|
||||
|
||||
Returns
|
||||
-------
|
||||
pd.DataFrame
|
||||
"""
|
||||
if agg_funcs is None:
|
||||
agg_funcs = ["count", "mean", "min", "max", "sum", "std"]
|
||||
|
||||
df = gdf.drop(columns=["geometry"], errors="ignore")
|
||||
|
||||
if columns is None:
|
||||
columns = df.select_dtypes(include="number").columns.tolist()
|
||||
|
||||
if not columns:
|
||||
raise ValueError("未找到数值列,请显式指定 columns 参数。")
|
||||
|
||||
subset = df[columns]
|
||||
|
||||
if group_col is None:
|
||||
return subset.agg(agg_funcs).T.rename_axis("column").reset_index() # type: ignore[no-untyped-call]
|
||||
|
||||
df_with_group = df[[group_col] + columns]
|
||||
return df_with_group.groupby(group_col)[columns].agg(agg_funcs).reset_index()
|
||||
|
||||
|
||||
def count_by_polygon(
|
||||
points: gpd.GeoDataFrame,
|
||||
polygons: gpd.GeoDataFrame,
|
||||
count_col: str = "point_count",
|
||||
) -> gpd.GeoDataFrame:
|
||||
"""统计每个面要素内的点要素数量(类似 ArcGIS「面要素统计点」)。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
points:
|
||||
点图层。
|
||||
polygons:
|
||||
面图层。
|
||||
count_col:
|
||||
新增计数列名。
|
||||
|
||||
Returns
|
||||
-------
|
||||
gpd.GeoDataFrame
|
||||
含 ``count_col`` 列的 polygons 副本。
|
||||
"""
|
||||
if points.crs != polygons.crs:
|
||||
points = points.to_crs(polygons.crs) # type: ignore
|
||||
|
||||
joined = gpd.sjoin(points, polygons, how="inner", predicate="within")
|
||||
point_counts = joined.groupby("index_right").size().rename(count_col)
|
||||
|
||||
result = polygons.copy()
|
||||
result = result.join(point_counts)
|
||||
result[count_col] = result[count_col].fillna(0).astype(int)
|
||||
return result
|
||||
Reference in New Issue
Block a user