初始化

This commit is contained in:
2026-04-22 12:27:49 +08:00
commit 4857cb6e45
73 changed files with 20927 additions and 0 deletions

View File

View File

@@ -0,0 +1,26 @@
import arcpy
import pandas as pd
def read_arcgis_table(table_path):
"""
将ArcGIS表格转换为Pandas DataFrame
:param table_path: ArcGIS表格路径
:return: Pandas DataFrame
表格字段全部转换为大写
面积字段AREA转换为亩保留4位小数存储在temp_area字段中
"""
array = arcpy.da.TableToNumPyArray(table_path, "*")
df = pd.DataFrame(array)
# df.to_csv(r"D:\工作\三普成果编制\出图数据\广西海城区\过程数据\酸化面积统计表\temp.csv")
df.columns = df.columns.str.upper()
df["temp_area"] = df["AREA"] * 0.0015
df["temp_area"] = df["temp_area"].round(4)
# 删除可能存在的OID字段如果不需要
if 'OID@' in df.columns:
df = df.drop('OID@', axis=1)
return df

View File

@@ -0,0 +1,291 @@
import arcpy
import numpy as np
def get_data_type(data_path):
"""获取数据类型
:param data_path: 数据路径
:return: 数据类型
"""
if arcpy.Exists(data_path):
try:
desc = arcpy.Describe(data_path)
return desc.dataType
except:
return False
else:
return False
def get_config_key(every_string: str) -> str:
config_dict = {
"AB": "有效硼","ACU": "有效铜","AMN": "有效锰","AMO": "有效钼","AS1": "有效硫","AZN": "有效锌","CEC": "阳离子交换量","ECA": "交换性钙",
"EMG": "交换性镁","TSE": "全硒","TN": "全氮","TP": "全磷","TK": "全钾","AFE": "有效铁","AK": "速效钾","AP": "有效磷", "TRRZ": "土壤容重","LSFD":"砾石丰度",
"OM": "有机质","FL": "粉粒含量","NL": "黏粒含量","SL": "砂粒含量","PH": "土壤 pH","YXTCHD": "有效土层厚度","GZCHD": "耕作层厚度","TRZD": "土壤质地","TRZD12": "土壤质地",
"三普PH": "三普PH","二普PH": "二普PH","测土PH": "测土PH","二普-三普": "二普-三普","测土-三普": "测土-三普","二普-测土": "二普-测土"
}
try:
for key in config_dict.keys():
in_key = every_string.split("_")[0]
if key == in_key:
return key
return ""
except Exception as e:
return ""
def parse_raster_standard(standard_str):
"""解析重分类标准字符串,返回数值范围
例如:
"2.00" -> (2.0, float('inf'))
"1.002.00" -> (1.0, 2.0)
"≤0.20" -> (0, 0.2)
"""
if "," in standard_str:
temp = []
parts = standard_str.split(",\n")
for part in parts:
temp_part = parse_raster_standard(part)
temp.append(temp_part)
return temp
if "" in standard_str:
value = float(standard_str.replace("", ""))
return (value, float('inf'))
elif "" in standard_str:
parts = standard_str.split("")
return (float(parts[0]), float(parts[1]))
elif "" in standard_str:
value = float(standard_str.replace("", ""))
return (0, value)
else:
# 尝试直接解析为数值
try:
value = float(standard_str)
return (value, value)
except ValueError:
return None
def create_remap_table(standards_dict):
"""根据标准配置创建重分类映射表
参数:
standards_config -- 标准配置,格式为:
{"标准1":5-6, "标准2":7-8, ...}
remap_values -- 重分类值数组默认为从1开始的整数序列
返回:
重分类映射表,格式为 [[old_min, old_max, new_value], ...]
"""
# 确保我们有一个有效的标准列表
if not standards_dict or not isinstance(standards_dict, dict):
print("警告: 没有有效的标准数据")
return []
# 设置重分类值
standards_length = len(standards_dict)
remap_values = list(range(1, 2*standards_length + 1))
remap_table = []
for i, (key, value) in enumerate(standards_dict.items()):
range_tuple = parse_raster_standard(value)
if range_tuple:
if type(range_tuple) is list:
m = 0
for range_tuple_item in range_tuple:
j = m * standards_length + i
remap_table.append([range_tuple_item[0], range_tuple_item[1], remap_values[j]])
m = m + 1
else:
remap_table.append([range_tuple[0], range_tuple[1], remap_values[i]])
return remap_table
def check_fields_exist_describe(feature_class, field_names):
"""
使用Describe函数检查要素类中字段是否存在
"""
try:
desc = arcpy.Describe(feature_class)
existing_fields = [field.name for field in desc.fields]
for field_name in field_names:
if field_name not in existing_fields:
return False
return True
except Exception as e:
print(f"检查字段时出错: {e}")
return None
def get_grade_by_standard(value, grade_standards):
"""
通用的等级判断函数
value: 数值
grade_standards: 分级标准字典,如 {"等级一": "2.00", "等级二": "1.002.00"}
"""
if value is None:
return "无数据"
# 按等级顺序检查(从高到低)
sorted_grades = sorted(grade_standards.items(),
key=lambda x: list(grade_standards.keys()).index(x[0]))
for grade_name, grade_standard in sorted_grades:
if is_value_in_grade(value, grade_standard):
return grade_name
return "超出范围"
def is_value_in_grade(value, grade_standard):
"""
判断数值是否在分级标准范围内
"""
# 处理特殊字符
grade_standard = grade_standard.replace('', '>').replace('', '<=').replace('', '~')
# 处理多范围情况如pH值
if ',' in grade_standard:
ranges = grade_standard.split(',')
for range_str in ranges:
if is_value_in_single_range(value, range_str.strip()):
return True
return False
else:
return is_value_in_single_range(value, grade_standard)
def is_value_in_single_range(value, range_str):
"""
判断数值是否在单个范围内
"""
import re
# 提取数值
numbers = re.findall(r'[-+]?\d*\.\d+|\d+', range_str)
numbers = [float(num) for num in numbers]
if '>' in range_str and '~' in range_str:
# 格式:>下限~上限
return numbers[0] < value <= numbers[1]
elif '>' in range_str:
# 格式:>数值
return value > numbers[0]
elif '<=' in range_str:
# 格式:<=数值
return value <= numbers[0]
elif '~' in range_str:
# 格式:下限~上限
return numbers[0] < value <= numbers[1]
else:
# 无法解析,使用字符串匹配
return str(value) == range_str
def vectorized_grade_assignment(values, grade_standards):
"""
向量化的等级分配(性能更好)
"""
# 确保输入值是数值类型,如果是字符串则转换为浮点数
if isinstance(values, np.ndarray) and values.dtype.kind in 'OUS': # 字符串类型
values = values.astype(float)
elif hasattr(values, 'dtype') and values.dtype == object: # 对象类型,可能包含字符串
values = values.astype(float)
conditions = []
choices = []
# 按等级顺序构建条件
# 创建两个列表来分别存储上段和下段范围
upper_ranges = []
lower_ranges = []
# 遍历排序后的等级
for i, (level, ranges) in enumerate(sorted(grade_standards.items(), key=lambda x: list(grade_standards.keys()).index(x[0])), 1):
# 分割范围字符串
range_list = [r.strip() for r in ranges.split(',')]
if len(range_list) >= 1:
upper_ranges.append((i, range_list[0]))
if len(range_list) >= 2:
# 计算下段范围的索引(原始索引 + 等级总数)
lower_index = i + len(grade_standards)
lower_ranges.append((lower_index, range_list[1]))
# 合并结果
sorted_grades = upper_ranges + lower_ranges
# sorted_grades = sorted(grade_standards.items(), key=lambda x: list(grade_standards.keys()).index(x[0]))
for grade_name, grade_standard in sorted_grades:
condition = create_condition(values, grade_standard)
conditions.append(condition)
choices.append(grade_name)
# 使用np.select进行向量化操作
result = np.select(conditions, choices, default="超出范围")
return result
def create_condition(values, grade_standard):
"""
创建numpy条件
"""
# 清理字符串:替换特殊字符并移除换行符和空格
grade_standard = (grade_standard.replace('', '>')
.replace('', '<=')
.replace('', '~')
.replace('\n', '') # 移除换行符
.replace(' ', '')) # 移除空格
if ',' in grade_standard:
# 多范围处理
ranges = grade_standard.split(',')
condition = None
for range_str in ranges:
if range_str: # 确保不是空字符串
range_condition = create_single_condition(values, range_str.strip())
if condition is None:
condition = range_condition
else:
condition = condition | range_condition
return condition
else:
return create_single_condition(values, grade_standard)
def create_single_condition(values, range_str):
"""
创建单个范围的条件
"""
import re
# 调试输出,帮助排查问题
# print(f"处理范围字符串: '{range_str}'")
# 提取数字
numbers = re.findall(r'[-+]?\d*\.\d+|\d+', range_str)
numbers = [float(num) for num in numbers]
if not numbers:
raise ValueError(f"无法从字符串 '{range_str}' 中提取数字")
# 根据范围符号创建条件
if '>' in range_str and '<=' in range_str:
# 处理 >x<=y 的情况(虽然不常见)
return (values > numbers[0]) & (values <= numbers[1])
elif '>' in range_str and '~' in range_str:
return (values > numbers[0]) & (values <= numbers[1])
elif '>' in range_str:
return values > numbers[0]
elif '<=' in range_str:
return values <= numbers[0]
elif '~' in range_str:
return (values > numbers[0]) & (values <= numbers[1])
else:
# 如果是单个数字
try:
return values == float(range_str)
except ValueError:
raise ValueError(f"无法解析的范围字符串: '{range_str}'")

View File

@@ -0,0 +1,59 @@
# utils/excel_utils.py
import re
from openpyxl.styles import Font, Alignment, Border, Side
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.worksheet import Worksheet
class ExcelStyleUtils:
"""Excel样式工具类"""
@staticmethod
def set_style(
ws: Worksheet,
cell_range: str,
font: Font=Font(name='宋体', size=11),
align: Alignment=Alignment(horizontal='center', vertical='center', wrap_text=True),
border: Border=Border(left=Side(style='thin'), right=Side(style='thin'),top=Side(style='thin'), bottom=Side(style='thin'))):
"""设置单元格样式"""
if cell_range:
for row in ws[cell_range]:
for cell in row:
cell.font = font
cell.alignment = align
cell.border = border
@staticmethod
def auto_adjust_column_width(ws: Worksheet):
"""自动调整列宽"""
dims = {}
for row in ws.rows:
for cell in row:
if cell.value:
merged_range = next((range for range in ws.merged_cells.ranges if cell.coordinate in range), None)
if get_merge_type(merged_range) == 'column':
continue
cell_len = 0.7 * len(re.findall('([\u4e00-\u9fa5])', str(cell.value))) + len(str(cell.value))
dims[cell.column] = max(dims.get(cell.column, 0), cell_len)
# 设置列宽
for col, value in dims.items():
ws.column_dimensions[get_column_letter(int(col))].width = value + 5
# 判断单元格类型
def get_merge_type(merged_range):
"""
判断合并类型
返回: 'row'(行合并), 'column'(列合并), 'both'(行列合并)或 None不是合并单元格
"""
if not merged_range:
return None
min_row, max_row = merged_range.min_row, merged_range.max_row
min_col, max_col = merged_range.min_col, merged_range.max_col
if max_row > min_row and max_col > min_col:
return 'both' # 同时跨行和跨列
elif max_row > min_row:
return 'row' # 行合并(垂直合并)
elif max_col > min_col:
return 'column' # 列合并(水平合并)
else:
return None # 实际上不是合并单元格

View File

@@ -0,0 +1,147 @@
from typing import List, Union
import numpy as np
# 解决百分比相加不为100%
def fix_percentages(values: List[float], total: float) -> List[float]:
"""
修正百分比相加不为100%的问题。
Args:
values (list[float]): 百分比列表,元素个数与总和相同
total (float): 总和
Returns:
list[float]: 修正后的百分比列表
Examples:
>>> values = [0.2, 0.3, 0.5]
>>> total = 1
>>> fix_percentages(values, total)
[20.0, 30.0, 50.0]
>>> values = [0.2, 0.3, 0.5]
>>> total = 0.8
>>> fix_percentages(values, total)
[25.0, 37.5, 62.5]
"""
exact = [v / total * 100 for v in values]
floor = [np.floor(p * 100) / 100 for p in exact] # 向下取整到2位小数
remainders = [exact[i] - floor[i] for i in range(len(exact))]
# 需要分配的百分点数以0.01%为单位)
to_distribute = int(round(10000 - sum(floor) * 100))
# 按余数大小分配
indices = sorted(range(len(remainders)), key=lambda i: remainders[i], reverse=True)
fixed = floor.copy()
for i in range(to_distribute):
fixed[indices[i]] += 0.01
return [round(p, 2) for p in fixed]
# === 误差矫正 ===
def correct_rounding_error(target_total:Union[int,float], adjusted_areas:List[float], original_areas:List[float]) -> List[int]:
"""
健壮的数值舍入误差矫正函数:将浮点型面积值四舍五入后,调整至目标总和。
核心逻辑基于原始数值的小数部分优先级逐次增减1来抵消舍入误差确保最终总和匹配目标值
同时避免调整后数值出现负数,防止无限循环。
Args:
target_total (int/float): 目标总和(最终舍入后数值的合计值),函数内部会转为整型
adjusted_areas (list[float]): 经过比例调整后的浮点型面积列表(待舍入的原始数据)
original_areas (list[float]): 调整前的原始浮点型面积列表(用于计算小数部分优先级)
Returns:
list[int]: 矫正后的整型面积列表,总和尽可能接近/等于target_total
若无法完全矫正,返回尽可能接近的结果并打印警告
Raises:
无显式抛出异常,所有异常会被捕获并打印错误信息,返回保底的四舍五入结果
Notes:
1. 误差矫正规则:
- 误差>0当前总和 < 目标总和):优先给小数部分大的数值+1
- 误差<0当前总和 > 目标总和):优先给小数部分小的数值-1
2. 边界限制调整时确保数值≥0避免出现负数面积
3. 防无限循环:最大迭代次数为 len(adjusted_areas) * 10超出则终止并提示剩余误差
Examples:
>>> target = 10
>>> adjusted = [3.2, 2.8, 4.1] # 四舍五入后总和=3+3+4=10无误差
>>> original = [3.2, 2.8, 4.1]
>>> correct_rounding_error(target, adjusted, original)
[3, 3, 4]
>>> target = 10
>>> adjusted = [3.1, 2.1, 4.1] # 四舍五入后总和=3+2+4=9误差+1
>>> original = [3.1, 2.1, 4.1]
>>> correct_rounding_error(target, adjusted, original)
[3, 2, 5] # 优先给小数部分最大的4.1+1
>>> target = 8
>>> adjusted = [3.9, 2.9, 1.9] # 四舍五入后总和=4+3+2=9误差-1
>>> original = [3.9, 2.9, 1.9]
>>> correct_rounding_error(target, adjusted, original)
[3, 3, 2] # 优先给小数部分最小的1.9-1实际小数1.9>2.9>3.9故调整3.9
"""
try:
target_total = int(target_total)
rounded_areas = [int(round(area)) for area in adjusted_areas]
current_total = sum(rounded_areas)
error = target_total - current_total
if error == 0 or len(adjusted_areas) == 0:
return rounded_areas
# 使用循环分配直到误差为0或无法再分配
remaining_error = error
max_iterations = len(adjusted_areas) * 10 # 防止无限循环
for _ in range(max_iterations):
if remaining_error == 0:
break
# 每次迭代重新计算小数部分和排序
decimal_parts = [float(area - int(area)) for area in original_areas]
indices = list(range(len(adjusted_areas)))
if remaining_error > 0:
indices.sort(key=lambda i: decimal_parts[i], reverse=True)
adjustment = 1
else:
indices.sort(key=lambda i: decimal_parts[i])
adjustment = -1
# 尝试分配一次调整
adjusted = False
for idx in indices:
if (adjustment == 1 and rounded_areas[idx] >= 0) or (adjustment == -1 and rounded_areas[idx] > 0):
rounded_areas[idx] += adjustment
remaining_error -= adjustment
adjusted = True
break
if not adjusted: # 无法再调整
break
if remaining_error != 0:
print(f"警告:无法完全矫正误差,剩余: {remaining_error}")
return rounded_areas
except Exception as e:
print(f"误差矫正出错: {e}")
# 返回原始四舍五入结果作为保底
return [int(round(area)) for area in adjusted_areas]
if __name__ == '__main__':
target = 10
adjusted = [3.3, 3.9, 4.2] # 四舍五入后总和=3+3+4=10无误差
original = [3.25, 2.85, 4.15]
print(correct_rounding_error(target, adjusted, original))

View File

@@ -0,0 +1,47 @@
import gc
import sys
import arcpy
# 临时文件清理
def clean_up_temp_files(temp_files, workspace=None):
"""安全清理临时文件和内存工作空间"""
try:
if temp_files:
for temp_file in temp_files:
if arcpy.Exists(temp_file):
try:
arcpy.management.Delete(temp_file)
# print_status(f"已删除临时文件: {temp_file}")
except Exception as delete_err:
sys.stderr.write(f"CleanupError:无法删除临时文件 {temp_file}: {str(delete_err)}\n")
# 清理内存工作空间 (确保在 in_memory 工作空间中操作,而不是删除其他地方的同名项)
try:
# 切换到内存工作空间进行清理
if arcpy.Exists("in_memory"):
arcpy.env.workspace = "in_memory"
# 删除内存工作空间中的所有内容
for item in arcpy.ListDatasets() + arcpy.ListFeatureClasses() + arcpy.ListRasters():
try:
arcpy.management.Delete(item)
# print_status(f"已清理内存项: in_memory/{item}")
except Exception as delete_mem_item_err:
sys.stderr.write(f"CleanupError:无法清理内存项 in_memory/{item}: {str(delete_mem_item_err)}\n")
except Exception as delete_in_memory_err:
sys.stderr.write(f"CleanupError:清理 in_memory 工作空间时发生错误: {str(delete_in_memory_err)}\n")
# 恢复原始工作空间
if workspace and arcpy.Exists(workspace):
try:
arcpy.env.workspace = workspace
arcpy.management.ClearWorkspaceCache()
except Exception as restore_ws_err:
sys.stderr.write(f"CleanupError:无法恢复原始工作空间 {workspace}: {str(restore_ws_err)}\n")
except Exception as cleanup_err:
# 外层异常捕获
sys.stderr.write(f"CleanupError:清理临时文件过程中发生未预料的错误: {str(cleanup_err)}\n")
# 强制垃圾回收
gc.collect()

View File

@@ -0,0 +1,201 @@
# 获取每个一级地类面积主要是12类
import arcpy
import numpy as np
import pandas as pd
from .math_utils import correct_rounding_error
# 获取目标面积
def get_area_by_group(dltb_class_feature, excel_target_path, xzqmc, is_by_xzq=False):
try:
# 读取目标面积Excel文件
target_df = pd.read_excel(excel_target_path)
# 确保列名匹配
target_df.columns = target_df.columns.str.strip()
if is_by_xzq:
# 地类编码映射字典
land_type_mapping = {
'耕地': '01',
'园地': '02',
'林地': '03',
'草地': '04',
'其他地类': '12'
}
# 方法1重命名列后转换为字典
df_encoded = target_df.rename(columns=land_type_mapping)
result_dict = df_encoded.set_index('行政单位').to_dict('index')
return result_dict
# 检查要素类是否存在
if not arcpy.Exists(dltb_class_feature):
print(f"警告:输入要素类不存在: {dltb_class_feature}")
else:
# 转为numpy数组供pandas统计使用
df = pd.DataFrame(arcpy.da.TableToNumPyArray(dltb_class_feature, ["YJDLBM", "TBDLMJ"], skip_nulls=False, null_value=np.nan))
qtdl_df = df[df['YJDLBM'] == '12']
if qtdl_df['TBDLMJ'].isnull().any() or qtdl_df['TBDLMJ'].eq(0).any():
print("警告其他地类TBDLMJ字段 存在空值或无效的记录,将不平差其他地类")
target_areas = {}
else:
area_by_group = df.groupby("YJDLBM")["TBDLMJ"].sum()
for key in area_by_group.keys():
area_by_group[key] = area_by_group[key] * 0.0015
target_areas = area_by_group.to_dict()
# 获取铁山港区的目标面积
gangnan_target = target_df[target_df['行政单位'] == xzqmc]
if gangnan_target.empty:
print(f"警告:未找到{xzqmc}的目标面积数据,将使用TBDLMJ数据进行平差")
return target_areas
# 提取各土地利用类型的目标面积
landuse_types = {'01':'耕地', '02':'园地', '03':'林地', '04':'草地', '12':'其他地类'}
for dlbm, dlmc in landuse_types.items():
if dlmc in gangnan_target.columns:
if gangnan_target[dlmc].values[0] and not np.isnan(gangnan_target[dlmc].values[0]):
target_areas[dlbm] = gangnan_target[dlmc].values[0]
return target_areas
except Exception as e:
print(f"计算面积时出错: {str(e)}")
return {}
# 按地类平差(全区统一平差)
def adjust_area_statistics(stats_df, target_areas):
"""
根据Excel中的目标面积对统计数据进行平差处理
Parameters:
stats_df: 原始统计数据DataFrame
excel_target_path: 包含目标面积的Excel文件路径
Returns:
adjusted_df: 平差后的DataFrame
"""
try:
if target_areas is None:
print("警告:目标面积数据为空,不进行平差")
return stats_df
# 准备平差数据
adjusted_df = stats_df.copy()
if "YJDLBM" not in adjusted_df.columns:
dlbm = "YNDLBM"
else:
dlbm = "YJDLBM"
adjusted_df['adjusted_area'] = adjusted_df['temp_area']
adjusted_df['adjustment_factor'] = 1.0
# 计算每个地类的原始总面积
original_totals = stats_df.groupby(dlbm)['temp_area'].sum().to_dict()
# 对每个地类进行平差
for yjdl, target_area in target_areas.items():
if (yjdl in original_totals and original_totals[yjdl] > 0) or target_area > 0:
adjustment_factor = target_area / original_totals[yjdl]
# 应用平差系数
mask = adjusted_df[dlbm] == yjdl
adjusted_df.loc[mask, 'adjusted_area'] = adjusted_df.loc[mask, 'temp_area'] * adjustment_factor
adjusted_df.loc[mask, 'adjustment_factor'] = adjustment_factor
# 应用误差矫正,确保总和等于目标值
adjusted_areas = adjusted_df.loc[mask, 'adjusted_area'].tolist()
original_areas = stats_df.loc[mask, 'temp_area'].tolist()
corrected_areas = correct_rounding_error(target_area, adjusted_areas, original_areas)
adjusted_df.loc[mask, 'adjusted_area'] = corrected_areas
print(f"地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}, 目标面积 = {target_area}, 矫正后总面积 = {sum(corrected_areas)}")
return adjusted_df
except Exception as e:
print(f"平差处理失败: {e}")
return stats_df
# 按行政区+地类进行平差
def adjust_by_district_landuse(stats_df:pd.DataFrame, target_areas_dict:dict):
"""
按行政区+地类进行平差
Parameters:
stats_df: 原始统计数据DataFrame
target_areas_dict: 目标面积字典,格式:{'行政区': {'地类': 目标面积}}
Returns:
adjusted_df: 平差后的DataFrame
"""
# 复制原始数据
adjusted_df = stats_df.copy()
adjusted_df['adjusted_area'] = adjusted_df['temp_area']
adjusted_df['adjustment_factor'] = 1.0
# 获取所有存在的行政区和地类
existing_districts = adjusted_df['XZQMC'].unique()
# 检查目标字典中的行政区是否存在
missing_districts = []
tt = [td for td in target_areas_dict.keys()]
for ed in existing_districts:
if ed not in tt:
missing_districts.append(ed)
# 如果有行政区不存在,返回原始数据并提示
if missing_districts:
print(f"警告:平差数据中不存在行政区: {missing_districts},未进行平差")
return stats_df
# 计算每个行政区每个地类的原始总面积
original_totals = stats_df.groupby(['XZQMC', 'YJDLBM'])['temp_area'].sum()
# 对每个行政区的每个地类进行平差
for xzqmc, landuse_targets in target_areas_dict.items():
for yjdl, target_area in landuse_targets.items():
# 检查该行政区是否有此地类数据
if (xzqmc, yjdl) in original_totals.index and original_totals.at[(xzqmc, yjdl)] > 0:
adjustment_factor = target_area / original_totals[(xzqmc, yjdl)]
# 应用平差系数
mask = (adjusted_df['XZQMC'] == xzqmc) & (adjusted_df['YJDLBM'] == yjdl)
adjusted_df.loc[mask, 'adjusted_area'] = adjusted_df.loc[mask, 'temp_area'] * adjustment_factor
adjusted_df.loc[mask, 'adjustment_factor'] = adjustment_factor
# 应用误差矫正,确保总和等于目标值
adjusted_areas = adjusted_df.loc[mask, 'adjusted_area'].tolist()
original_areas = stats_df.loc[mask, 'temp_area'].tolist()
corrected_areas = correct_rounding_error(target_area, adjusted_areas, original_areas)
adjusted_df.loc[mask, 'adjusted_area'] = corrected_areas
print(f"{xzqmc} - 地类 {yjdl}: 平差系数 = {adjustment_factor:.6f}, 目标面积 = {target_area}, 矫正后总面积 = {sum(corrected_areas)}")
return adjusted_df
def get_target_areas(excel_path:str, sheet_name:str, xzqmc:str) -> pd.DataFrame:
df_excel = pd.read_excel(excel_path, sheet_name)
target_df = df_excel[df_excel['行政单位'] == xzqmc]
df_area_for_merge = target_df.set_index('行政单位').iloc[0].reset_index(name='面积').rename(columns={'index': 'EJDL'})
return df_area_for_merge
def get_target_areas_by_group(excel_target_path):
# 读取目标面积Excel文件
target_df = pd.read_excel(excel_target_path,"Sheet1")
# 确保列名匹配
target_df.columns = target_df.columns.str.strip()
result_dict = target_df.set_index('行政单位').to_dict('index')
return result_dict