137 lines
3.9 KiB
Python
137 lines
3.9 KiB
Python
"""
|
||
geo_tools.analysis.stats
|
||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||
空间统计工具:属性汇总、面积加权均值、空间自相关指数等。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import geopandas as gpd
|
||
import numpy as np
|
||
import pandas as pd
|
||
|
||
from app.utils.logger import get_logger
|
||
|
||
logger = get_logger(__name__)
|
||
|
||
|
||
def area_weighted_mean(
|
||
gdf: gpd.GeoDataFrame,
|
||
value_col: str,
|
||
group_col: str | None = None,
|
||
projected_crs: str = "EPSG:3857",
|
||
) -> pd.Series | pd.DataFrame:
|
||
"""计算面积加权均值。
|
||
|
||
Parameters
|
||
----------
|
||
gdf:
|
||
输入 GeoDataFrame(面要素)。
|
||
value_col:
|
||
需要加权平均的属性列名。
|
||
group_col:
|
||
分组字段名;若为 ``None`` 则对整个 GDF 计算单一结果。
|
||
projected_crs:
|
||
用于计算面积的平面投影 CRS。
|
||
|
||
Returns
|
||
-------
|
||
pd.Series(无分组)或 pd.DataFrame(有分组)
|
||
"""
|
||
gdf = gdf.copy()
|
||
|
||
# 计算面积
|
||
if not gdf.crs or not gdf.crs.is_projected:
|
||
projected = gdf.to_crs(projected_crs)
|
||
else:
|
||
projected = gdf
|
||
gdf["_area"] = projected.geometry.area
|
||
|
||
if group_col is None:
|
||
total_area = gdf["_area"].sum()
|
||
result = (gdf[value_col] * gdf["_area"]).sum() / total_area
|
||
return pd.Series({"area_weighted_mean": result, "total_area": total_area})
|
||
|
||
def _weighted(group: pd.DataFrame) -> float:
|
||
return float((group[value_col] * group["_area"]).sum() / group["_area"].sum())
|
||
|
||
result = gdf.groupby(group_col).apply(_weighted, include_groups=False).rename("area_weighted_mean") # type: ignore[no-untyped-call]
|
||
area_sum = gdf.groupby(group_col)["_area"].sum().rename("total_area")
|
||
return pd.concat([result, area_sum], axis=1).reset_index()
|
||
|
||
|
||
def summarize_attributes(
|
||
gdf: gpd.GeoDataFrame,
|
||
columns: list[str] | None = None,
|
||
group_col: str | None = None,
|
||
agg_funcs: list[str] | None = None,
|
||
) -> pd.DataFrame:
|
||
"""对属性列进行统计汇总(最大、最小、均值、总和等)。
|
||
|
||
Parameters
|
||
----------
|
||
gdf:
|
||
输入 GeoDataFrame。
|
||
columns:
|
||
统计的列名列表;``None`` 则自动选取所有数值列。
|
||
group_col:
|
||
分组字段名;``None`` 则对全局统计。
|
||
agg_funcs:
|
||
聚合函数列表,默认 ``["count", "mean", "min", "max", "sum", "std"]``。
|
||
|
||
Returns
|
||
-------
|
||
pd.DataFrame
|
||
"""
|
||
if agg_funcs is None:
|
||
agg_funcs = ["count", "mean", "min", "max", "sum", "std"]
|
||
|
||
df = gdf.drop(columns=["geometry"], errors="ignore")
|
||
|
||
if columns is None:
|
||
columns = df.select_dtypes(include="number").columns.tolist()
|
||
|
||
if not columns:
|
||
raise ValueError("未找到数值列,请显式指定 columns 参数。")
|
||
|
||
subset = df[columns]
|
||
|
||
if group_col is None:
|
||
return subset.agg(agg_funcs).T.rename_axis("column").reset_index() # type: ignore[no-untyped-call]
|
||
|
||
df_with_group = df[[group_col] + columns]
|
||
return df_with_group.groupby(group_col)[columns].agg(agg_funcs).reset_index()
|
||
|
||
|
||
def count_by_polygon(
|
||
points: gpd.GeoDataFrame,
|
||
polygons: gpd.GeoDataFrame,
|
||
count_col: str = "point_count",
|
||
) -> gpd.GeoDataFrame:
|
||
"""统计每个面要素内的点要素数量(类似 ArcGIS「面要素统计点」)。
|
||
|
||
Parameters
|
||
----------
|
||
points:
|
||
点图层。
|
||
polygons:
|
||
面图层。
|
||
count_col:
|
||
新增计数列名。
|
||
|
||
Returns
|
||
-------
|
||
gpd.GeoDataFrame
|
||
含 ``count_col`` 列的 polygons 副本。
|
||
"""
|
||
if points.crs != polygons.crs:
|
||
points = points.to_crs(polygons.crs) # type: ignore
|
||
|
||
joined = gpd.sjoin(points, polygons, how="inner", predicate="within")
|
||
point_counts = joined.groupby("index_right").size().rename(count_col)
|
||
|
||
result = polygons.copy()
|
||
result = result.join(point_counts)
|
||
result[count_col] = result[count_col].fillna(0).astype(int)
|
||
return result
|