Files
geo_tools/app/analysis/stats.py
missum db51d41aef refactor: 重构项目结构,将geo_tools重命名为app并更新相关引用
- 将主包名从geo_tools改为app
- 更新所有模块中的引用路径
- 迁移并更新测试用例
- 添加项目规则文档
- 保持原有功能不变,仅进行结构调整
2026-04-12 19:49:56 +08:00

137 lines
3.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
geo_tools.analysis.stats
~~~~~~~~~~~~~~~~~~~~~~~~~
空间统计工具:属性汇总、面积加权均值、空间自相关指数等。
"""
from __future__ import annotations
import geopandas as gpd
import numpy as np
import pandas as pd
from app.utils.logger import get_logger
logger = get_logger(__name__)
def area_weighted_mean(
gdf: gpd.GeoDataFrame,
value_col: str,
group_col: str | None = None,
projected_crs: str = "EPSG:3857",
) -> pd.Series | pd.DataFrame:
"""计算面积加权均值。
Parameters
----------
gdf:
输入 GeoDataFrame面要素
value_col:
需要加权平均的属性列名。
group_col:
分组字段名;若为 ``None`` 则对整个 GDF 计算单一结果。
projected_crs:
用于计算面积的平面投影 CRS。
Returns
-------
pd.Series无分组或 pd.DataFrame有分组
"""
gdf = gdf.copy()
# 计算面积
if not gdf.crs or not gdf.crs.is_projected:
projected = gdf.to_crs(projected_crs)
else:
projected = gdf
gdf["_area"] = projected.geometry.area
if group_col is None:
total_area = gdf["_area"].sum()
result = (gdf[value_col] * gdf["_area"]).sum() / total_area
return pd.Series({"area_weighted_mean": result, "total_area": total_area})
def _weighted(group: pd.DataFrame) -> float:
return float((group[value_col] * group["_area"]).sum() / group["_area"].sum())
result = gdf.groupby(group_col).apply(_weighted, include_groups=False).rename("area_weighted_mean") # type: ignore[no-untyped-call]
area_sum = gdf.groupby(group_col)["_area"].sum().rename("total_area")
return pd.concat([result, area_sum], axis=1).reset_index()
def summarize_attributes(
gdf: gpd.GeoDataFrame,
columns: list[str] | None = None,
group_col: str | None = None,
agg_funcs: list[str] | None = None,
) -> pd.DataFrame:
"""对属性列进行统计汇总(最大、最小、均值、总和等)。
Parameters
----------
gdf:
输入 GeoDataFrame。
columns:
统计的列名列表;``None`` 则自动选取所有数值列。
group_col:
分组字段名;``None`` 则对全局统计。
agg_funcs:
聚合函数列表,默认 ``["count", "mean", "min", "max", "sum", "std"]``。
Returns
-------
pd.DataFrame
"""
if agg_funcs is None:
agg_funcs = ["count", "mean", "min", "max", "sum", "std"]
df = gdf.drop(columns=["geometry"], errors="ignore")
if columns is None:
columns = df.select_dtypes(include="number").columns.tolist()
if not columns:
raise ValueError("未找到数值列,请显式指定 columns 参数。")
subset = df[columns]
if group_col is None:
return subset.agg(agg_funcs).T.rename_axis("column").reset_index() # type: ignore[no-untyped-call]
df_with_group = df[[group_col] + columns]
return df_with_group.groupby(group_col)[columns].agg(agg_funcs).reset_index()
def count_by_polygon(
points: gpd.GeoDataFrame,
polygons: gpd.GeoDataFrame,
count_col: str = "point_count",
) -> gpd.GeoDataFrame:
"""统计每个面要素内的点要素数量(类似 ArcGIS「面要素统计点」
Parameters
----------
points:
点图层。
polygons:
面图层。
count_col:
新增计数列名。
Returns
-------
gpd.GeoDataFrame
含 ``count_col`` 列的 polygons 副本。
"""
if points.crs != polygons.crs:
points = points.to_crs(polygons.crs) # type: ignore
joined = gpd.sjoin(points, polygons, how="inner", predicate="within")
point_counts = joined.groupby("index_right").size().rename(count_col)
result = polygons.copy()
result = result.join(point_counts)
result[count_col] = result[count_col].fillna(0).astype(int)
return result