19-多进程学习
- 多进程学习
- 注意逗号
- 注意使用进程池
- 注意使用异步
- 注意查看任务管理器,多进程是否跑起来
import pandas as pdfrom geopy.distance import geodesicfrom multiprocessing import Manager, Poolimport timeimport warningswarnings.filterwarnings("ignore")def count_points(index, match_table, to_match_table1, to_match_table2, cabinet_affiliation, res): for i in match_table.index: # 统计字段初始化 within_10m = dict() num_10m = 0 name_10m = [] within_30m = dict() num_30m = 0 name_30m = [] within_50m = dict() num_50m = 0 name_50m = [] # 第一个点经纬度坐标 lgt1 = match_table['delivery_lat'][i] lat1 = match_table['delivery_lgt'][i] # zno_code = match_table['zno_code'][i] # 遍历to_match_table1 for j in to_match_table1.index: # if to_match_table1['所在网点代码'][j] == zno_code: # 第二个点经纬度坐标 lgt2 = to_match_table1['y'][j] lat2 = to_match_table1['x'][j] # 统计字段初始化 name_10m_dict_zc = dict() name_30m_dict_zc = dict() name_50m_dict_zc = dict() # 距离计算及逻辑判断 distance1 = geodesic((lgt1, lat1), (lgt2, lat2)).m if distance1 <= 10: num_10m = 1 name_10m_dict_zc['name'] = to_match_table1['name'][j] name_10m_dict_zc['comp_name'] = to_match_table1['comp_name'][j] name_10m.append(name_10m_dict_zc) num_30m = 1 name_30m_dict_zc['name'] = to_match_table1['name'][j] name_30m_dict_zc['comp_name'] = to_match_table1['comp_name'][j] name_30m.append(name_30m_dict_zc) num_50m = 1 name_50m_dict_zc['name'] = to_match_table1['name'][j] name_50m_dict_zc['comp_name'] = to_match_table1['comp_name'][j] name_50m.append(name_50m_dict_zc) elif distance1 <= 30: num_30m = 1 name_30m_dict_zc['name'] = to_match_table1['name'][j] name_30m_dict_zc['comp_name'] = to_match_table1['comp_name'][j] name_30m.append(name_30m_dict_zc) num_50m = 1 name_50m_dict_zc['name'] = to_match_table1['name'][j] name_50m_dict_zc['comp_name'] = to_match_table1['comp_name'][j] name_50m.append(name_50m_dict_zc) elif distance1 <= 50: num_50m = 1 name_50m_dict_zc['name'] = to_match_table1['name'][j] name_50m_dict_zc['comp_name'] = to_match_table1['comp_name'][j] name_50m.append(name_50m_dict_zc) else: pass # 遍历to_match_table2 for k in to_match_table2.index: # 第二个点经纬度坐标 lgt2 = to_match_table2['latitude'][k] lat2 = to_match_table2['longitude'][k] # 统计字段初始化 name_10m_dict_fc = dict() name_30m_dict_fc = dict() name_50m_dict_fc = dict() # 距离计算及逻辑判断 distance2 = geodesic((lgt1, lat1), (lgt2, lat2)).m if distance2 <= 10: num_10m = 1 name_10m_dict_fc['name'] = 'fc' name_10m_dict_fc['belongorg'] = cabinet_affiliation[to_match_table2['belongorg'][k]] name_10m.append(name_10m_dict_fc) num_30m = 1 name_30m_dict_fc['name'] = 'fc' name_30m_dict_fc['belongorg'] = cabinet_affiliation[to_match_table2['belongorg'][k]] name_30m.append(name_30m_dict_fc) num_50m = 1 name_50m_dict_fc['name'] = 'fc' name_50m_dict_fc['belongorg'] = cabinet_affiliation[to_match_table2['belongorg'][k]] name_50m.append(name_50m_dict_fc) elif distance2 <= 30: num_30m = 1 name_30m_dict_fc['name'] = 'fc' name_30m_dict_fc['belongorg'] = cabinet_affiliation[to_match_table2['belongorg'][k]] name_30m.append(name_30m_dict_fc) num_50m = 1 name_50m_dict_fc['name'] = 'fc' name_50m_dict_fc['belongorg'] = cabinet_affiliation[to_match_table2['belongorg'][k]] name_50m.append(name_50m_dict_fc) elif distance2 <= 50: num_50m = 1 name_50m_dict_fc['name'] = 'fc' name_50m_dict_fc['belongorg'] = cabinet_affiliation[to_match_table2['belongorg'][k]] name_50m.append(name_50m_dict_fc) else: pass # 组装字段-字典 within_10m['points_num'] = num_10m within_10m['name-or-code'] = name_10m within_30m['points_num'] = num_30m within_30m['name-or-code'] = name_30m within_50m['points_num'] = num_50m within_50m['name-or-code'] = name_50m # DataFrame对应字段赋值 match_table['80_10m'][i] = within_10m match_table['80_30m'][i] = within_30m match_table['80_50m'][i] = within_50m res[index] = match_tableif __name__ == '__main__': cabinet_affiliation = { "1": "a", "2": "b", "3": "c", "4": "d", "5": "e", "6": "f", "7": "g", "8": "h", "9": "i", "10": "j" } # 读入数据 coord_80 = pd.read_csv(r'', dtype=str) to_match_1_zc = pd.read_csv(r'', sep='\t', dtype=str) to_match_2_fc = pd.read_csv(r'', dtype=str) # 添加字段及字段值类型转换 coord_80['80_10m'] = '' coord_80['80_30m'] = '' coord_80['80_50m'] = '' coord_80['delivery_lgt'] = coord_80['delivery_lgt'].apply(pd.to_numeric) coord_80['delivery_lat'] = coord_80['delivery_lat'].apply(pd.to_numeric) to_match_1_zc['x'] = to_match_1_zc['x'].apply(pd.to_numeric) to_match_1_zc['y'] = to_match_1_zc['y'].apply(pd.to_numeric) to_match_2_fc['longitude'] = to_match_2_fc['longitude'].apply(pd.to_numeric) to_match_2_fc['latitude'] = to_match_2_fc['latitude'].apply(pd.to_numeric) # 多进程 time_start = time.time() manager = Manager() res = manager.dict() pool_nums = 8 pool_band = 10 p = Pool(pool_nums - 1) for i in range(pool_nums): index = i * pool_band coord_80_temp = coord_80.iloc[index:index pool_band, :] p.apply_async(count_points, args=(index, coord_80_temp, to_match_1_zc, to_match_2_fc, cabinet_affiliation, res,)) p.close() p.join() df_temp = pd.DataFrame() for values in res.values(): df_temp = pd.concat((df_temp, values)) df_temp.to_csv(r'', index=False) time_end = time.time() print('用时{}s'.format(time_end - time_start))
赞 (0)