refactor(io): 移除分块读取功能并简化矢量数据读取逻辑

移除 read_vector 函数中的 chunk_size 参数及相关分块读取逻辑，简化代码结构保留 rows 参数用于数据预览功能，并更新相关文档说明
2026-04-21 20:50:50 +08:00
parent db51d41aef
commit 2bcdcad797
4 changed files with 32 additions and 79 deletions
--- a/app/io/readers.py
+++ b/app/io/readers.py
@@ -35,7 +35,6 @@ def read_vector(
    layer: str | int | None = None,
    crs: str | int | None = None,
    encoding: str = "utf-8",
-    chunk_size: int | None = None,
    rows: int | None = None,
    **kwargs: Any,
 ):
@@ -51,9 +50,6 @@ def read_vector(
        读取后强制重投影到目标 CRS（不传则保留原始 CRS）。
    encoding:
        属性表编码，Shapefile 中文路径常需指定 ``"gbk"``。
-    chunk_size:
-        分块大小，默认 None（一次性读取全部数据）。
-        【警告】：若不设置 chunk_size，大文件可能会占用大量内存。
    rows:
        限制读取的行数，默认 None（读取全部数据）。
        用于快速预览数据，避免读取大文件的全部内容。
@@ -62,22 +58,15 @@ def read_vector(

    Returns
    -------
-    gpd.GeoDataFrame 或生成器
-        如果 chunk_size 为 None，返回完整的 GeoDataFrame；
-        如果设置了 chunk_size，返回一个生成器，每次 yield 一个 GeoDataFrame 块。
+    gpd.GeoDataFrame
+        读取的矢量数据。

    示例
    -----
-    # 全量读取（老方法）
+    ### 全量读取
    gdf = read_vector("data.shp")
    
-    # 分块读取（新方法）
-    for chunk in read_vector("large_data.shp", chunk_size=10000):
-        # 处理每个数据块
-        print(f"处理了 {len(chunk)} 条数据")
-        # 在这里做你的操作，比如计算、过滤等
-    
-    # 只读取前 5 行数据（预览模式）
+    ### 只读取前 5 行数据（预览模式）
    gdf_preview = read_vector("large_data.shp", rows=5)
    print(gdf_preview.head())
    """
@@ -88,70 +77,27 @@ def read_vector(
    logger.info("读取矢量数据：%s（格式：%s，图层：%s）", path, suffix or "目录", layer)

    if suffix == ".csv":
-        # CSV 文件暂时不支持分块读取
-        if chunk_size is not None:
-            logger.warning("CSV 文件暂不支持分块读取，将一次性读取全部数据")
        return _read_csv_vector(path, crs=crs, **kwargs)

-    # fiona / geopandas 通用读取
    read_kwargs: dict[str, Any] = {"encoding": encoding, **kwargs}
    if layer is not None:
        read_kwargs["layer"] = layer

-    # 分块读取模式
-    if chunk_size is not None:
-        def _chunk_generator():
-            logger.info("启用分块读取模式，每块 %d 条数据", chunk_size)
-            try:
-                # 使用 fiona 打开文件
-                with fiona.open(str(path), **read_kwargs) as src:
-                    # 获取坐标系信息
-                    crs_info = src.crs
-                    # 分块读取
-                    features = []
-                    for i, feature in enumerate(src):
-                        # 检查是否达到行数限制
-                        if rows is not None and i >= rows:
-                            break
-                        
-                        features.append(feature)
-                        if (i + 1) % chunk_size == 0:
-                            # 创建 GeoDataFrame 并设置 CRS
-                            gdf = gpd.GeoDataFrame.from_features(features, crs=crs_info)
-                            # 重投影
-                            if crs is not None:
-                                gdf = gdf.to_crs(crs)   # type: ignore
-                            logger.debug("读取并处理第 %d 块数据，共 %d 条", (i + 1) // chunk_size, len(gdf))
-                            yield gdf
-                            features = []
-                    # 处理最后一块
-                    if features:
-                        gdf = gpd.GeoDataFrame.from_features(features, crs=crs_info)
-                        if crs is not None:
-                            gdf = gdf.to_crs(crs)   # type: ignore
-                        logger.debug("读取并处理最后一块数据，共 %d 条", len(gdf))
-                        yield gdf
-            except Exception as exc:
-                raise RuntimeError(f"无法分块读取矢量数据：{exc}") from None
-        return _chunk_generator()
-    else:
-        # 一次性读取模式
-        try:
-            # 添加 rows 参数到读取参数中
-            if rows is not None:
-                read_kwargs["rows"] = rows
-                logger.info("限制读取行数：%d", rows)
-            
-            gdf = gpd.read_file(str(path), **read_kwargs)
-        except Exception as exc:
-            raise RuntimeError(f"无法读取矢量数据：{exc}") from None
+    try:
+        if rows is not None:
+            read_kwargs["rows"] = rows
+            logger.info("限制读取行数：%d", rows)

-        if crs is not None:
-            logger.debug("重投影到 %s", crs)
-            gdf = gdf.to_crs(crs)   # type: ignore
+        gdf = gpd.read_file(str(path), **read_kwargs)
+    except Exception as exc:
+        raise RuntimeError(f"无法读取矢量数据：{exc}") from None

-        logger.info("读取完成：共 %d 条要素，CRS=%s", len(gdf), gdf.crs)
-        return gdf # type: ignore
+    if crs is not None:
+        logger.debug("重投影到 %s", crs)
+        gdf = gdf.to_crs(crs)   # type: ignore
+
+    logger.info("读取完成：共 %d 条要素，CRS=%s", len(gdf), gdf.crs)
+    return gdf


 # ── GDB 专用 ───────────────────────────────────────────────────────────────────