求助：Python多进程调用百度地图API获取数据，比单进程慢，为什么啊？（附代码）

多进程调用百度地图 api，先获取经纬度，然后利用经纬度获取过路费，保存到 excel （获取一条存一条），因为想加快速度，所以写了多进程，但是经过测试，多进程比单进程还要慢，想请教下为什么啊？附代码（我的 ak 隐藏了）：

单进程代码：

import requests
from openpyxl import load_workbook
import time
获取经纬度
def geocode(address):
base = url = “http://api.map.baidu.com/geocoder/v2/?address=” + address + “&output=json&ak=”
response = requests.get(base)
answer = response.json()
return answer[‘result’][‘location’]
获取过路费
def get(origin_lat,origin_lng,destination_lat,destination_lng):
base = url = “http://api.map.baidu.com/direction/v2/driving?origin=” + str(origin_lng) + “,” + str(origin_lat) +  “&destination=” 

+ str(destination_lng)+","+str(destination_lat) + “&output=json&ak=”
response = requests.get(base)
answer = response.json()
info = [answer[‘result’][‘routes’][0][‘duration’]/60,answer[‘result’][‘routes’][0][‘distance’]/1000,answer[‘result’][‘routes’][0][‘toll’]]
return info
if name==‘main’:
start = time.clock()
data = load_workbook(r"ODdata.xlsx")
table = data.get_sheet_by_name(‘locationcode’)
nrows = table.max_row
ncols = table.max_column
origin_table = data.get_sheet_by_name('OD')
origin_nrows = origin_table.max_row
origin_ncols = origin_table.max_column
go_outset = []
go_destination = []
for r in range(2,nrows+1):
    go_outset.append(table.cell(row=r,column=2).value) #生成去程出发地列表
for r in range(2,nrows+1):
    go_destination.append(table.cell(row=r,column=5).value) #生成去程目的地列表

go_outset_count=1
go_destination_count=1
go_outset_locationcode = []
for i in go_outset:
    try:
        go_outset_locationcode.append(geocode(i)) #生成去程出发地经纬度
        print("出发地经纬度查询计数%d"%go_outset_count)
        go_outset_count+=1
    except:
        go_outset_locationcode.append({'lat':'wrong','lng':'wrong'})

go_destination_locationcode = []
for i in go_destination:
    try:
        go_destination_locationcode.append(geocode(i))#生成去程目的地经纬度
        print("目的地经纬度查询计数%d" % go_destination_count)
        go_destination_count+=1
    except:
        go_destination_locationcode.append({'lat':'wrong','lng':'wrong'})

go_outset_locationcodelist = []
go_destination_locationcodelist = []

for i in range(len(go_outset_locationcode)):
    go_outset_locationcodelist.append(go_outset_locationcode[i].values())

for i in range(len(go_destination_locationcode)):
    go_destination_locationcodelist.append(go_destination_locationcode[i].values())

#将经纬度和省份写入 excel
for i in range(2,nrows+1):
    for j in range(3,5):
        _ = table.cell(column=j, row=i, value=list(go_outset_locationcodelist[i-2])[j-3])
for i in range(2,nrows+1):
    for j in range(6,8):
        _ = table.cell(column=j, row=i, value=list(go_destination_locationcodelist[i-2])[j-6])
data.save(r"ODdata.xlsx")

#获取过路费
info = []
go_count=1
for i in range(0,len(go_outset)):
    if list(go_outset_locationcodelist[i])[0]=='wrong':
        continue
    else:
        try:
            info.append(get(list(go_outset_locationcodelist[i])[0],list(go_outset_locationcodelist[i])[1],list(go_destination_locationcodelist[i])[0],list(go_destination_locationcodelist[i])[1]))
            print("过路费查询计数%d" % go_count)
            go_count+=1
        except:
            info.append(['wrong','wrong','wrong'])
            print("错误行数是%d"%i)
        finally:
            for j in range(8,11):
                _ = origin_table.cell(column=j, row=i+3, value=info[i][j - 8])
            data.save(r"ODdata.xlsx")

elapsed = (time.clock() - start)
print("Time used:", elapsed)

多进程

import requests
from openpyxl import load_workbook
import multiprocessing
from multiprocessing import Lock,Pool
import time
获取经纬度
def geocode(address):
base = url = “http://api.map.baidu.com/geocoder/v2/?address=” + address + “&output=json&ak=”
response = requests.get(base)
answer = response.json()
return answer[‘result’][‘location’]
保存到 excel
def save(info):
# data_new = load_workbook(r"ODdata.xlsx")
# origin_table_new = data_new[‘OD’]
for j in range(8, 11):
_ = origin_table.cell(column=j, row=i + 3, value=info[j-8])
data.save(r"ODdata.xlsx")
print(“第%d 行保存成功” % (i + 1))
获取过路费
def getall(i,origin_lat,origin_lng,destination_lat,destination_lng):
try:
base = url = “http://api.map.baidu.com/direction/v2/driving?origin=” + str(origin_lng) + “,” + str(
origin_lat) + “&destination=” 

+ str(destination_lng) + “,” + str(
destination_lat) + “&output=json&ak=”
response = requests.get(base)
answer = response.json()
info = [answer[‘result’][‘routes’][0][‘duration’] / 60, answer[‘result’][‘routes’][0][‘distance’] / 1000,
answer[‘result’][‘routes’][0][‘toll’]]
print(“过路费查询成功，第%d 行” % (i+1))
except:
info=[‘wrong’, ‘wrong’, ‘wrong’]
print(“过路费查询失败，第%d 行” % (i+1))
# finally:
#     for j in range(8, 11):
#         _ = origin_table.cell(column=j, row=i + 3, value=info[j - 8])
#     data.save(r"ODdata.xlsx")
#     print(“第%d 行保存成功” % (i + 1))
return info
if name==‘main’:
start = time.clock()
data = load_workbook(r"ODdata.xlsx")
table = data['locationcode']
nrows = table.max_row
ncols = table.max_column

origin_table = data['OD']
origin_nrows = origin_table.max_row
origin_ncols = origin_table.max_column
go_outset = []
go_destination = []
for r in range(2,nrows+1):
    go_outset.append(table.cell(row=r,column=2).value) #生成去程出发地列表
for r in range(2,nrows+1):
    go_destination.append(table.cell(row=r,column=5).value) #生成去程目的地列表

go_outset_count=1
go_destination_count=1
go_outset_locationcode = []
for i in go_outset:
    try:
        go_outset_locationcode.append(geocode(i)) #生成去程出发地经纬度
        print("出发地经纬度查询计数%d"%go_outset_count)
        go_outset_count+=1
    except:
        go_outset_locationcode.append({'lat':'wrong','lng':'wrong'})

go_destination_locationcode = []
for i in go_destination:
    try:
        go_destination_locationcode.append(geocode(i))#生成去程目的地经纬度
        print("目的地经纬度查询计数%d" % go_destination_count)
        go_destination_count+=1
    except:
        go_destination_locationcode.append({'lat':'wrong','lng':'wrong'})

go_outset_locationcodelist = []
go_destination_locationcodelist = []

for i in range(len(go_outset_locationcode)):
    go_outset_locationcodelist.append(go_outset_locationcode[i].values())

for i in range(len(go_destination_locationcode)):
    go_destination_locationcodelist.append(go_destination_locationcode[i].values())

#将经纬度和省份写入 excel
for i in range(2,nrows+1):
    for j in range(3,5):
        _ = table.cell(column=j, row=i, value=list(go_outset_locationcodelist[i-2])[j-3])
for i in range(2,nrows+1):
    for j in range(6,8):
        _ = table.cell(column=j, row=i, value=list(go_destination_locationcodelist[i-2])[j-6])
data.save(r"ODdata.xlsx")

#开启多进程，获取过路费
for i in range(0,len(go_outset)):
    if list(go_outset_locationcodelist[i])[0]=='wrong':
        continue
    else:
        pool = multiprocessing.Pool(processes=5)
        pool.apply_async(getall,(i,list(go_outset_locationcodelist[i])[0], list(go_outset_locationcodelist[i])[1],list(go_destination_locationcodelist[i])[0], list(go_destination_locationcodelist[i])[1],),callback=save)
        # pool.apply(getall, (i, list(go_outset_locationcodelist[i])[0], list(go_outset_locationcodelist[i])[1],
        #                           list(go_destination_locationcodelist[i])[0],
        #                           list(go_destination_locationcodelist[i])[1],))
        # p = multiprocessing.Process(target=save, args=(i,list(go_outset_locationcodelist[i])[0], list(go_outset_locationcodelist[i])[1],
        #                 list(go_destination_locationcodelist[i])[0], list(go_destination_locationcodelist[i])[1],lock))
        # p.start()
        pool.close()
        pool.join()

elapsed = (time.clock() - start)
print("Time used:", elapsed)

求助：Python多进程调用百度地图API获取数据，比单进程慢，为什么啊？（附代码）

htzhanglong 1楼

大神能帮我看一下这个问题吗？

nodeper 2楼

我看了你的代码，问题出在进程间通信开销上。你用multiprocessing.Queue在主进程和子进程之间传递数据，这个队列的序列化/反序列化开销很大，特别是你每次只传一个地址进去。

改成用multiprocessing.Pool的map方法，让进程池自己处理任务分配，这样效率高得多：

import requests
import pandas as pd
from multiprocessing import Pool, cpu_count
import time

def get_location(addr):
    """获取单个地址的经纬度"""
    try:
        url = f"http://api.map.baidu.com/geocoding/v3/?address={addr}&output=json&ak=你的AK"
        response = requests.get(url, timeout=5)
        data = response.json()
        
        if data['status'] == 0:
            result = data['result']
            return {
                '地址': addr,
                '经度': result['location']['lng'],
                '纬度': result['location']['lat']
            }
        else:
            return {'地址': addr, '经度': None, '纬度': None}
    except Exception as e:
        return {'地址': addr, '经度': None, '纬度': None}

def main():
    # 读取地址数据
    df = pd.read_excel('addresses.xlsx')
    addresses = df['地址'].tolist()
    
    print(f"开始处理 {len(addresses)} 个地址...")
    start_time = time.time()
    
    # 创建进程池，进程数设为CPU核心数
    with Pool(processes=cpu_count()) as pool:
        # 使用map并行处理
        results = pool.map(get_location, addresses)
    
    # 转换为DataFrame
    result_df = pd.DataFrame(results)
    
    # 保存结果
    result_df.to_excel('results.xlsx', index=False)
    
    end_time = time.time()
    print(f"处理完成！耗时：{end_time - start_time:.2f}秒")
    print(f"成功获取：{result_df['经度'].notna().sum()} 条数据")

if __name__ == '__main__':
    main()

关键改动：

去掉Queue，直接用pool.map分配任务
每个进程独立完成API调用和数据处理
结果直接返回，由主进程收集

这样改之后，多进程速度应该能明显提升。如果还慢，可能是百度API的限流导致的，可以适当加些延迟。

总结：用进程池的map代替手动队列管理。

itying888 3楼

自己来回复一下：明白问题出在哪里了就是我在创建和阻塞进程池的时候卸载了循环内应该卸载循环外才对

ionicwang 4楼

写在