SCMOPTのケーススタディ 2

ロジスティックネットワーク設計

ロジスティックネットワーク設計問題を例として,データの前処理やクラスタリングの方法による違いを調べる.

まずはデータを読み込む.列名はSCMOPTにあわせて変えておく.

cust_df = pd.read_csv(folder+ "Cust.csv")
cust_df.columns= ["name","lat","lon"]
cust_df["lat"] = cust_df["lat"].astype(float)
cust_df["lon"] = cust_df["lon"].astype(float)
#cust_df.head()
fig = plot_cust(cust_df)
plotly.offline.plot(fig);
total_demand_df = pd.read_csv(folder+ "Cust-Prod.csv")
total_demand_df.columns=["cust","prod","demand"]
#total_demand_df.head()
dc_df = pd.read_csv(folder+ "DC.csv")
dc_df.columns=["name","fc","vc","lb","ub","lat","lon"]
dc_df.lb = 0.
#dc_df.head()
plnt_df = pd.read_csv(folder+ "Plnt.csv")
plnt_df.columns=["name","lat","lon"]
#plnt_df.head()
prod_df = pd.read_csv(folder+ "Prod.csv")
prod_df.columns=["name","weight"]
#prod_df
plnt_prod_df = pd.read_csv(folder+"Plnt-Prod.csv")
plnt_prod_df.columns=["plnt","prod","ub"]
#plnt_prod_df.head()

同一座標の顧客

同じ緯度・経度をもつ地点が多数存在する.これは,K-d木を用いると容易に確認できる. ここでは,そのままのデータで解くが,k-メディアン法は,同じ地点があると2つともクラスターの中心(メディアン)として選択してしまうので,注意を要する.

df = enumerate_near_points(cust_df, 0.0001)
#df

地点間の道路距離と移動時間を計算

durations, distances, node_df = compute_durations(cust_df, plnt_df)
trans_df, graph, position  = make_network_using_road(cust_df, dc_df, plnt_df, durations, distances, plnt_dc_threshold = 10000., dc_cust_threshold = 200. )
# 大圏距離で代用する場合には,以下を生かす
#trans_df, graph, position  = make_network(cust_df, dc_df, plnt_df, plnt_dc_threshold = 10000., dc_cust_threshold = 500. )

需要の分布を確認

需要の分布を確認すると,ほとんどが2000以下なので,除外する. 0の需要だけを除外した場合は,数値のばらつきがあまりにも大きいために,Gurobiでさえ不安定になり,解くことができない(もしくは非常に時間がかかる).

fig = px.histogram(total_demand_df,"demand")
plotly.offline.plot(fig);
total_demand_df = total_demand_df[ total_demand_df.demand >= 2000. ]

需要がない顧客を除外

これで773点の問題例が,約300点になった.

print(len(cust_df))
cust_df = remove_zero_total_demand(total_demand_df, cust_df)
print(len(cust_df))
773
299
flow_df, dc_df, cost_df, violation_df, status = solve_lnd(prod_df, cust_df, dc_df, plnt_df, plnt_prod_df, total_demand_df, 
                               trans_df, dc_num= (5,13), single_sourcing=True, max_cpu = 100)
cost_df
cost value
0 transportation (plant to dc) 9.736955e+08
1 delivey (dc to customer) 3.606422e+08
2 dc fixed -0.000000e+00
3 dc variable 6.524372e+08
4 infeasible penalty 0.000000e+00
fig = px.bar(cost_df, y="cost", x="value", orientation='h')
plotly.offline.plot(fig);
cost_df.value.sum()
1986774926.5981956
fig = show_optimized_network(cust_df, dc_df, plnt_df, prod_df, flow_df, position)
plotly.offline.plot(fig);

階層的クラスタリング法で集約

データを読み直して,地点間の移動時間を計算した後で,クラスタリングを行いデータを集約する.

最適化後に,非集約し,本当の配送費用を計算する.

まずは,階層的クラスタリング法を適用する.これは,道路距離と時間を用いる.

weight_of_prod ={p:w for (p,w) in zip(prod_df.name, prod_df.weight) } #prepare weight dic.
weight_of_cust = defaultdict(int)
for row in total_demand_df.itertuples():
    weight_of_cust[row.cust] += weight_of_prod[row.prod]*row.demand 
weight = [weight_of_cust[i] for i in weight_of_cust] 
durations, distances, node_df = compute_durations(cust_df, None)
fig = elbow_method(cust_df, weight, n_lb = 3, n_ub = 60, repetitions=1, method="hierarchical", durations=durations)
plotly.offline.plot(fig);
X, Y, partition, cost = hierarchical_clusterning(cust_df, weight, durations, num_of_facilities = 50, linkage="complete")
print(cost)
382277301752.6439
fig = show_optimized_continuous_network(cust_df, X, Y, partition, weight= weight)
plotly.offline.plot(fig);
aggregated_cust_df, aggregated_total_demand_df =  make_aggregated_df(cust_df, None, total_demand_df, X, Y, partition, weight)
aggregated_dc_df = generate_dc(aggregated_cust_df, aggregated_total_demand_df, prod_df, num_dc=10, lb_ratio=0.0, ub_ratio=10.3,vc_bounds=(19,21),fc_bounds=(0,1))
durations, distances, node_df = compute_durations(aggregated_cust_df, plnt_df)
aggregated_trans_df, graph, position  = make_network_using_road(aggregated_cust_df, aggregated_dc_df, plnt_df, durations, distances, plnt_dc_threshold = 10000., dc_cust_threshold = 500. )
flow_df, dc_df, cost_df, violation_df, status = solve_lnd(prod_df, aggregated_cust_df,aggregated_dc_df, plnt_df, plnt_prod_df, aggregated_total_demand_df, 
                               aggregated_trans_df, dc_num= (5,13), single_sourcing=False, max_cpu = 100)
fig = show_optimized_network(aggregated_cust_df, aggregated_dc_df, plnt_df, prod_df, flow_df, position)
plotly.offline.plot(fig);
cost_df
cost value
0 transportation (plant to dc) 9.761203e+08
1 delivey (dc to customer) 2.418022e+08
2 dc fixed -0.000000e+00
3 dc variable 6.505497e+08
4 infeasible penalty 0.000000e+00

上の費用のうち,顧客への配送費用は,集約した点に対するものなので,本来の費用ではない.本来の費用を再計算する.

from_dc = {} #集約した顧客に入る倉庫の名称を入れた辞書(集約した顧客名をキーとする)
for row in flow_df.itertuples():
    if row.to_node[:5]=="Cust_":
        from_dc[row.to_node[5:]] = row.from_node[3:]
#開設した集約倉庫に最も近い顧客を探索
from scipy.spatial import KDTree 
points = [(i,j)  for (i,j)  in zip(cust_df.lat, cust_df.lon)]
tree = KDTree(points)
dc_cust_dic={} #集約した倉庫の名前を入れると,それに最も近い,もとの顧客indexを返す辞書
for row in dc_df[ dc_df.open_close==1 ].itertuples():
    dis, near = tree.query( (row.lat, row.lon) )
    dc_cust_dic[row.name] = cust_df.loc[int(near),"index"]
durations, distances, node_df = compute_durations(cust_df, None)
#dc_set = set(from_dc.values())    #from_dc: 各集約顧客が,どのDCから来ているかを表す辞書
#agg_cust_set = set(from_dc.keys())
dc_per_dis = 10./4000
dc_per_time = 8000./4000
total_del = 0.
for row in aggregated_cust_df.itertuples():
    dc_name = from_dc[row.name]
    #print(dc_name, row.name)
    i = dc_cust_dic[dc_name]
    for c in row.customers:
        j = cust_df.loc[c,"index"]
        if distances[i][j] < 9999999:
            dis = distances[i][j]/1000.
            time = durations[i][j]/3600.
            cost_per_kg = dis*dc_per_dis + time*dc_per_time
            total_del += weight[j]*cost_per_kg 
total_del
436088454.2303338

配送費用は,クラスタリングした場合より大きくなり,総費用は以下のように増加する. これは,需要が2000未満のものを削除して最適化した場合の1.4%増であり,良い近似になっていると考えられる.

total = cost_df.value[0] + cost_df.value[3] + total_del
print(total, total/1986774926)
2020658919.3316379 1.0170547719765404

k-メディアン法で集約

k-median法で集約する k-メディアン法も道路距離と移動時間をもとに計算を行う.同じ座標の地点があると,何も割り当てられないクラスターができることがあるので,多少多めに(この場合は1つ多く)施設数を入れておく.

durations, distances, node_df = compute_durations(cust_df, None)
X, Y, partition, best_ub, lb_list, ub_list, phi_list = solve_k_median(cust_df, weight, durations, num_of_facilities=51, 
                                 max_iter=10000, max_lr=0.01, moms=(0.85,0.95), convergence=1e-5,  lr_find = True, adam=False)
fig = plot_k_median_lr_find(phi_list, lb_list)
plotly.offline.plot(fig);
#lr_finder => 0.01 , adam => 50000
X, Y, partition, best_ub, lb_list, ub_list, phi_list = solve_k_median(cust_df, weight, durations, num_of_facilities=51, 
                                 max_iter=2000, max_lr=0.05, moms=(0.85,0.95), convergence=1e-5,  lr_find = False, adam=False)
fig = plot_k_median(lb_list, ub_list)
plotly.offline.plot(fig);
fig = show_optimized_continuous_network(cust_df, X, Y, partition, weight= weight)
plotly.offline.plot(fig);
aggregated_cust_df, aggregated_total_demand_df =  make_aggregated_df(cust_df, None, total_demand_df, X, Y, partition, weight)
aggregated_dc_df = generate_dc(aggregated_cust_df, aggregated_total_demand_df, prod_df, num_dc=10, lb_ratio=0.0, ub_ratio=10.3,vc_bounds=(19,21),fc_bounds=(0,1))
durations, distances, node_df = compute_durations(aggregated_cust_df, plnt_df)
trans_df, graph, position  = make_network_using_road(aggregated_cust_df, aggregated_dc_df, plnt_df, durations, distances, plnt_dc_threshold = 10000., dc_cust_threshold = 500. )
flow_df, dc_df, cost_df, violation_df, status = solve_lnd(prod_df, aggregated_cust_df,aggregated_dc_df, plnt_df, plnt_prod_df, aggregated_total_demand_df, 
                               trans_df, dc_num= (5,13), single_sourcing=True, max_cpu = 100)
cost_df
cost value
0 transportation (plant to dc) 9.647053e+08
1 delivey (dc to customer) 2.503247e+08
2 dc fixed -0.000000e+00
3 dc variable 6.269581e+08
4 infeasible penalty -0.000000e+00
fig = show_optimized_network(aggregated_cust_df, aggregated_dc_df, plnt_df, prod_df, flow_df, position)
plotly.offline.plot(fig);
aggregated_cust_df.head()
name lat lon customers demand
0 cust0 34.577843 135.438274 [51, 112, 125, 151, 152, 206, 256, 265, 266, 267, 270, 285] 8.997956e+06
1 cust1 41.826084 140.725339 [1] 1.824079e+06
2 cust2 36.031805 139.630817 [2, 15, 64, 81, 85, 129, 238, 293] 4.023094e+06
3 cust3 34.674853 135.634251 [8, 35, 82, 86, 103, 104, 120, 150, 259, 264] 8.146061e+06
4 cust4 35.982864 139.860838 [11, 68, 77, 154, 174, 182, 260, 275, 287] 6.499158e+06
try:
    cust_df.reset_index(inplace=True)
except:
    pass
from_dc = {} #集約した顧客に入る倉庫の名称を入れた辞書(集約した顧客名をキーとする)
for row in flow_df.itertuples():
    if row.to_node[:5]=="Cust_":
        from_dc[row.to_node[5:]] = row.from_node[3:]
#開設した集約倉庫に最も近い顧客を探索
from scipy.spatial import KDTree 
points = [(i,j)  for (i,j)  in zip(cust_df.lat, cust_df.lon)]
tree = KDTree(points)
dc_cust_dic={} #集約した倉庫の名前を入れると,それに最も近い,もとの顧客indexを返す辞書
for row in dc_df[ dc_df.open_close==1 ].itertuples():
    dis, near = tree.query( (row.lat, row.lon) )
    dc_cust_dic[row.name] = cust_df.loc[int(near),"index"]
durations, distances, node_df = compute_durations(cust_df, None)
#dc_set = set(from_dc.values())    #from_dc: 各集約顧客が,どのDCから来ているかを表す辞書
#agg_cust_set = set(from_dc.keys())
dc_per_dis = 10./4000
dc_per_time = 8000./4000
total_del = 0.
for row in aggregated_cust_df.itertuples():
    try:
        dc_name = from_dc[row.name]
    except:
        continue
    i = dc_cust_dic[dc_name]
    for c in row.customers:
        j = cust_df.loc[c,"index"]
        if distances[i][j] < 9999999:
            dis = distances[i][j]/1000.
            time = durations[i][j]/3600.
            cost_per_kg = dis*dc_per_dis + time*dc_per_time
            total_del += weight[j]*cost_per_kg 
total_del
429417810.7833409
cost_df.value[0] + cost_df.value[3] + total_del
2021081255.6539354
2005167825/1986774926
1.0092576661600168

k-means法で集約

k-means法は重心法ともよばれ,座標をもとに平面上の適当な地点に施設を配置する.

weight_of_prod ={p:w for (p,w) in zip(prod_df.name, prod_df.weight) } #prepare weight dic.
weight_of_cust = defaultdict(int)
for row in total_demand_df.itertuples():
    weight_of_cust[row.cust] += weight_of_prod[row.prod]*row.demand 
weight = [weight_of_cust[i] for i in weight_of_cust] 
durations, distances, node_df = compute_durations(cust_df, None)
durations, distances, node_df = compute_durations(cust_df, None)
X, Y, partition, cost = kmeans(cust_df, weight, num_of_facilities =50)
fig = show_optimized_continuous_network(cust_df, X, Y, partition, weight= weight)
plotly.offline.plot(fig);
aggregated_cust_df, aggregated_total_demand_df =  make_aggregated_df(cust_df, None, total_demand_df, X, Y, partition, weight)
aggregated_dc_df = generate_dc(aggregated_cust_df, aggregated_total_demand_df, prod_df, num_dc=10, lb_ratio=0.0, ub_ratio=10.3,vc_bounds=(19,21),fc_bounds=(0,1))
durations, distances, node_df = compute_durations(aggregated_cust_df, plnt_df)
trans_df, graph, position  = make_network_using_road(aggregated_cust_df, aggregated_dc_df, plnt_df, durations, distances, plnt_dc_threshold = 10000., dc_cust_threshold = 500. )
flow_df, dc_df, cost_df, violation_df, status = solve_lnd(prod_df, aggregated_cust_df,aggregated_dc_df, plnt_df, plnt_prod_df, aggregated_total_demand_df, 
                               trans_df, dc_num= (5,13), single_sourcing=True, max_cpu = 100)
cost_df
cost value
0 transportation (plant to dc) 9.935685e+08
1 delivey (dc to customer) 2.343569e+08
2 dc fixed -0.000000e+00
3 dc variable 6.559424e+08
4 infeasible penalty -0.000000e+00
#cust_df.reset_index(inplace=True)
from_dc = {} #集約した顧客に入る倉庫の名称を入れた辞書(集約した顧客名をキーとする)
for row in flow_df.itertuples():
    if row.to_node[:5]=="Cust_":
        from_dc[row.to_node[5:]] = row.from_node[3:]
#開設した集約倉庫に最も近い顧客を探索
from scipy.spatial import KDTree 
points = [(i,j)  for (i,j)  in zip(cust_df.lat, cust_df.lon)]
tree = KDTree(points)
dc_cust_dic={} #集約した倉庫の名前を入れると,それに最も近い,もとの顧客indexを返す辞書
for row in dc_df[ dc_df.open_close==1 ].itertuples():
    dis, near = tree.query( (row.lat, row.lon) )
    dc_cust_dic[row.name] = cust_df.loc[int(near),"index"]
durations, distances, node_df = compute_durations(cust_df, None)
#dc_set = set(from_dc.values())    #from_dc: 各集約顧客が,どのDCから来ているかを表す辞書
#agg_cust_set = set(from_dc.keys())
dc_per_dis = 10./4000
dc_per_time = 8000./4000
total_del = 0.
for row in aggregated_cust_df.itertuples():
    #try:
    dc_name = from_dc[row.name]
    #except:
    #    print("error")
    #    continue
    i = dc_cust_dic[dc_name]
    for c in row.customers:
        j = cust_df.loc[c,"index"]
        if distances[i][j] < 9999999:
            dis = distances[i][j]/1000.
            time = durations[i][j]/3600.
            cost_per_kg = dis*dc_per_dis + time*dc_per_time
            total_del += weight[j]*cost_per_kg 
        else:
            print(i,j,"error")
#total_del
cost_df.value[0] + cost_df.value[3] + total_del
304 413 error
304 420 error
304 429 error
2022563380.9275873

道路距離が非常に大きい地点間で配送を行っている. これは,離島(対馬)への道路距離が計算できないためである.

cost_df.value[0] + cost_df.value[3] + total_del
2022563380.9275873
fig = show_optimized_network(aggregated_cust_df, aggregated_dc_df, plnt_df, prod_df, flow_df, position)
plotly.offline.plot(fig);

反復 Weiszfeld法で集約

k-means法が施設と顧客の距離の(重み付き)自乗和の最小化であるのに対して, Weiszfeld法は(重み付き)直線距離の和を最小化する.

durations, distances, node_df = compute_durations(cust_df, None)
%%time
X, Y, partition, cost = repeated_weiszfeld(cust_df, weight, 
            num_of_facilities = 50, epsilon=0.0001, max_iter = 30, numpy=True, seed=2)
print(cost)
2462506476.093891
CPU times: user 2min 38s, sys: 83.3 ms, total: 2min 38s
Wall time: 2min 38s
fig = show_optimized_continuous_network(cust_df, X, Y, partition, weight= weight)
plotly.offline.plot(fig);
aggregated_cust_df, aggregated_total_demand_df =  make_aggregated_df(cust_df, None, total_demand_df, X, Y, partition, weight)
aggregated_dc_df = generate_dc(aggregated_cust_df, aggregated_total_demand_df, prod_df, num_dc=10, lb_ratio=0.0, ub_ratio=10.3,vc_bounds=(19,21),fc_bounds=(0,1))
durations, distances, node_df = compute_durations(aggregated_cust_df, plnt_df)
trans_df, graph, position  = make_network_using_road(aggregated_cust_df, aggregated_dc_df, plnt_df, durations, distances, plnt_dc_threshold = 10000., dc_cust_threshold = 500. )
flow_df, dc_df, cost_df, violation_df, status = solve_lnd(prod_df, aggregated_cust_df,aggregated_dc_df, plnt_df, plnt_prod_df, aggregated_total_demand_df, 
                               trans_df, dc_num= (5,13), single_sourcing=True, max_cpu = 100)
cost_df
cost value
0 transportation (plant to dc) 9.902503e+08
1 delivey (dc to customer) 2.799122e+08
2 dc fixed -0.000000e+00
3 dc variable 6.400399e+08
4 infeasible penalty -0.000000e+00
fig = show_optimized_network(aggregated_cust_df, aggregated_dc_df, plnt_df, prod_df, flow_df, position)
plotly.offline.plot(fig);
cust_df.reset_index(inplace=True)
from_dc = {} #集約した顧客に入る倉庫の名称を入れた辞書(集約した顧客名をキーとする)
for row in flow_df.itertuples():
    if row.to_node[:5]=="Cust_":
        from_dc[row.to_node[5:]] = row.from_node[3:]
#開設した集約倉庫に最も近い顧客を探索
from scipy.spatial import KDTree 
points = [(i,j)  for (i,j)  in zip(cust_df.lat, cust_df.lon)]
tree = KDTree(points)
dc_cust_dic={} #集約した倉庫の名前を入れると,それに最も近い,もとの顧客indexを返す辞書
for row in dc_df[ dc_df.open_close==1 ].itertuples():
    dis, near = tree.query( (row.lat, row.lon) )
    dc_cust_dic[row.name] = cust_df.loc[int(near),"index"]
durations, distances, node_df = compute_durations(cust_df, None)
#dc_set = set(from_dc.values())    #from_dc: 各集約顧客が,どのDCから来ているかを表す辞書
#agg_cust_set = set(from_dc.keys())
dc_per_dis = 10./4000
dc_per_time = 8000./4000
total_del = 0.
for row in aggregated_cust_df.itertuples():
    try:
        dc_name = from_dc[row.name]
    except:
        print("error")
        continue
    i = dc_cust_dic[dc_name]
    for c in row.customers:
        j = cust_df.loc[c,"index"]
        if distances[i][j] < 9999999:
            dis = distances[i][j]/1000.
            time = durations[i][j]/3600.
            cost_per_kg = dis*dc_per_dis + time*dc_per_time
            total_del += weight[j]*cost_per_kg 
        else:
            print(i,j,"error")
#total_del
cost_df.value[0] + cost_df.value[3] + total_del
122 413 error
122 420 error
122 429 error
1992036715.0329504

k-means法と同様に,道路距離が非常に大きい地点間で配送を行っている. これは,離島(対馬)への道路距離が計算できないためである.