nc数据补值
我的NC数据的右上角缺失值,准备使用IDW插值进行补充。上图为补值前,下图为补值后:
整体逻辑为:
代码:
import numpy as npfrom netCDF4 import Datasetfrom scipy.spatial import cKDTreeimport osdef fill_missing_idw (variable, lat, lon, power=2 , max_neighbors=8 ) : """ 使用IDW方法填补二维数组中的缺失值。 参数: - variable: 2D numpy数组,可能包含缺失值 (masked array) - lat, lon: 2D numpy数组,与variable形状相同的纬度和经度 - power: IDW的幂参数 - max_neighbors: 使用的最大邻居数量 返回: - filled_variable: 填补后的二维numpy数组 """ filled_variable = variable.copy() ny, nx = variable.shape if isinstance(variable, np.ma.MaskedArray): known_mask = ~variable.mask missing_mask = variable.mask known_values = variable.data[known_mask] else : known_mask = ~np.isnan(variable) missing_mask = np.isnan(variable) known_values = variable[known_mask] known_points = np.column_stack((lon[known_mask], lat[known_mask])) missing_points = np.column_stack((lon[missing_mask], lat[missing_mask])) if len(known_points) == 0 : print("没有可用的已知点进行插值。" ) return filled_variable tree = cKDTree(known_points) distances, indexes = tree.query(missing_points, k=max_neighbors, n_jobs=-1 ) if max_neighbors == 1 : distances = distances[:, np.newaxis] indexes = indexes[:, np.newaxis] with np.errstate(divide='ignore' ): weights = 1 / distances**power weights[~np.isfinite(weights)] = 0 interpolated_values = np.sum(weights * known_values[indexes], axis=1 ) / np.sum(weights, axis=1 ) exact_matches = distances[:,0 ] == 0 interpolated_values[exact_matches] = known_values[indexes[exact_matches, 0 ]] filled_variable[missing_mask] = interpolated_values # 调试信息 n_filled = np.sum(~filled_variable[missing_mask].mask) if isinstance(filled_variable, np.ma.MaskedArray) else np.sum(~np.isnan(filled_variable[missing_mask])) print(f"尝试填补了 {len(missing_points)} 个缺失点,成功填补了 {n_filled} 个。" ) return filled_variabledef fill_missing_idw (variable, lat, lon, power=2 , max_neighbors=8 ) : """ 使用IDW方法填补二维数组中的缺失值。 参数: - variable: 2D numpy数组,可能包含缺失值 (masked array) - lat, lon: 2D numpy数组,与variable形状相同的纬度和经度 - power: IDW的幂参数 - max_neighbors: 使用的最大邻居数量 返回: - filled_variable: 填补后的二维numpy数组 """ filled_variable = variable.copy() ny, nx = variable.shape if isinstance(variable, np.ma.MaskedArray): known_mask = ~variable.mask missing_mask = variable.mask known_values = variable.data[known_mask] else : known_mask = ~np.isnan(variable) missing_mask = np.isnan(variable) known_values = variable[known_mask] known_points = np.column_stack((lon[known_mask], lat[known_mask])) missing_points = np.column_stack((lon[missing_mask], lat[missing_mask])) if len(known_points) == 0 : print("没有可用的已知点进行插值。" ) return filled_variable tree = cKDTree(known_points) distances, indexes = tree.query(missing_points, k=max_neighbors) if max_neighbors == 1 : distances = distances[:, np.newaxis] indexes = indexes[:, np.newaxis] with np.errstate(divide='ignore' ): weights = 1 / distances**power weights[~np.isfinite(weights)] = 0 # 处理距离为0的情况 # 计算权重和 weight_sums = np.sum(weights, axis=1 ) # 避免除以零 weight_sums[weight_sums == 0 ] = 1 interpolated_values = np.sum(weights * known_values[indexes], axis=1 ) / weight_sums # 处理距离为0的情况,直接赋值 exact_matches = distances[:,0 ] == 0 if np.any(exact_matches): interpolated_values[exact_matches] = known_values[indexes[exact_matches, 0 ]] filled_variable[missing_mask] = interpolated_values # 调试信息 n_filled = np.sum(~filled_variable[missing_mask].mask) if isinstance(filled_variable, np.ma.MaskedArray) else np.sum(~np.isnan(filled_variable[missing_mask])) print(f"尝试填补了 {len(missing_points)} 个缺失点,成功填补了 {n_filled} 个。" ) return filled_variabledef process_nc_file (input_path, output_path) : # 打开 NetCDF 文件 with Dataset(input_path, 'r' ) as src: # 创建新的 NetCDF 文件 with Dataset(output_path, 'w' , format=src.file_format) as dst: # 复制全局属性 dst.setncatts({attr: src.getncattr(attr) for attr in src.ncattrs()}) # 复制维度 for dim_name, dim in src.dimensions.items(): dst.createDimension(dim_name, (len(dim) if not dim.isunlimited() else None )) # 复制变量并处理数据 for var_name, var in src.variables.items(): # 复制变量定义 fill_value = getattr(var, '_FillValue' , None ) dst_var = dst.createVariable(var_name, var.datatype, var.dimensions, fill_value=fill_value) dst_var.setncatts({attr: var.getncattr(attr) for attr in var.ncattrs()}) # 读取数据 data = var[:] # 检查是否是需要插值的变量 if var_name in ['T2D' , 'Q2D'