SCMOPTのケーススタディ 2

料金表

2021(2024)年から施行された標準料金を読み込み,pickleに保管しておく.

df.tail()
km area 2 4 10 20
215 180 沖縄 38480 44990 59890 77070
216 190 沖縄 40010 46770 62270 80170
217 200 沖縄 41540 48540 64660 83280
218 210 沖縄 3050 3530 4700 6110
219 220 沖縄 7610 8810 11740 15270
def simple_cycles2(G, limit):
    subG = type(G)(G.edges())
    sccs = list(nx.strongly_connected_components(subG))
    while sccs:
        scc = sccs.pop()
        startnode = scc.pop()
        path = [startnode]
        blocked = set()
        blocked.add(startnode)
        stack = [(startnode, list(subG[startnode]))]

        while stack:
            thisnode, nbrs = stack[-1]

            if nbrs and len(path) < limit:
                nextnode = nbrs.pop()
                if nextnode == startnode:
                    yield path[:]
                elif nextnode not in blocked:
                    path.append(nextnode)
                    stack.append((nextnode, list(subG[nextnode])))
                    blocked.add(nextnode)
                    continue
            if not nbrs or len(path) >= limit:
                blocked.remove(thisnode)
                stack.pop()
                path.pop()
        subG.remove_node(startnode)
        H = subG.subgraph(scc)
        sccs.extend(list(nx.strongly_connected_components(H)))
dem = od_df.SHASHU_SAIDAISEKISAIRYO_CD_1.astype(int, errors="ignore")
dem.hist()
<AxesSubplot:>
%matplotlib inline
#地点名も出すようにする!
data =[go.Scattermapbox(
        lat=lat_head,
        lon=lon_head,
        marker = {'size': num},
        hoverinfo='text',
        hovertext= head, 
        text= head,
        mode="markers",
        name="車庫"
    )]
data.append(    
    go.Scattermapbox(
        lat=lat_switch,
        lon=lon_switch,
        marker = {'size': 5},
        hoverinfo='text',
        text= switch,
        mode="markers",
        name="中継地点"
    )
)
fig = go.Figure(data=data, layout=layout)
plotly.offline.plot(fig);  

ロジスティックネットワーク設計

ロジスティックネットワーク設計問題を例として,データの前処理やクラスタリングの方法による違いを調べる.

まずはデータを読み込む.列名はSCMOPTにあわせて変えておく.

cust_df = pd.read_csv(folder+ "Cust.csv")
cust_df.columns= ["name","lat","lon"]
cust_df["lat"] = cust_df["lat"].astype(float)
cust_df["lon"] = cust_df["lon"].astype(float)
#cust_df.head()
fig = plot_cust(cust_df)
plotly.offline.plot(fig);
total_demand_df = pd.read_csv(folder+ "Cust-Prod.csv")
total_demand_df.columns=["cust","prod","demand"]
#total_demand_df.head()
dc_df = pd.read_csv(folder+ "DC.csv")
dc_df.columns=["name","fc","vc","lb","ub","lat","lon"]
dc_df.lb = 0.
#dc_df.head()
plnt_df = pd.read_csv(folder+ "Plnt.csv")
plnt_df.columns=["name","lat","lon"]
#plnt_df.head()
prod_df = pd.read_csv(folder+ "Prod.csv")
prod_df.columns=["name","weight"]
#prod_df
plnt_prod_df = pd.read_csv(folder+"Plnt-Prod.csv")
plnt_prod_df.columns=["plnt","prod","ub"]
#plnt_prod_df.head()

同一座標の顧客

同じ緯度・経度をもつ地点が多数存在する.これは,K-d木を用いると容易に確認できる. ここでは,そのままのデータで解くが,k-メディアン法は,同じ地点があると2つともクラスターの中心(メディアン)として選択してしまうので,注意を要する.

df = enumerate_near_points(cust_df, 0.0001)
#df

地点間の道路距離と移動時間を計算

durations, distances, node_df = compute_durations(cust_df, plnt_df)
trans_df, graph, position  = make_network_using_road(cust_df, dc_df, plnt_df, durations, distances, plnt_dc_threshold = 10000., dc_cust_threshold = 200. )
# 大圏距離で代用する場合には,以下を生かす
#trans_df, graph, position  = make_network(cust_df, dc_df, plnt_df, plnt_dc_threshold = 10000., dc_cust_threshold = 500. )

需要の分布を確認

需要の分布を確認すると,ほとんどが2000以下なので,除外する. 0の需要だけを除外した場合は,数値のばらつきがあまりにも大きいために,Gurobiでさえ不安定になり,解くことができない(もしくは非常に時間がかかる).

fig = px.histogram(total_demand_df,"demand")
plotly.offline.plot(fig);
total_demand_df = total_demand_df[ total_demand_df.demand >= 2000. ]

需要がない顧客を除外

これで773点の問題例が,約300点になった.

print(len(cust_df))
cust_df = remove_zero_total_demand(total_demand_df, cust_df)
print(len(cust_df))
773
299
flow_df, dc_df, cost_df, violation_df, status = solve_lnd(prod_df, cust_df, dc_df, plnt_df, plnt_prod_df, total_demand_df, 
                               trans_df, dc_num= (5,13), single_sourcing=True, max_cpu = 100)
cost_df
cost value
0 transportation (plant to dc) 9.736955e+08
1 delivey (dc to customer) 3.606422e+08
2 dc fixed -0.000000e+00
3 dc variable 6.524372e+08
4 infeasible penalty 0.000000e+00
fig = px.bar(cost_df, y="cost", x="value", orientation='h')
plotly.offline.plot(fig);
cost_df.value.sum()
1986774926.5981956
fig = show_optimized_network(cust_df, dc_df, plnt_df, prod_df, flow_df, position)
plotly.offline.plot(fig);

階層的クラスタリング法で集約

データを読み直して,地点間の移動時間を計算した後で,クラスタリングを行いデータを集約する.

最適化後に,非集約し,本当の配送費用を計算する.

まずは,階層的クラスタリング法を適用する.これは,道路距離と時間を用いる.

weight_of_prod ={p:w for (p,w) in zip(prod_df.name, prod_df.weight) } #prepare weight dic.
weight_of_cust = defaultdict(int)
for row in total_demand_df.itertuples():
    weight_of_cust[row.cust] += weight_of_prod[row.prod]*row.demand 
weight = [weight_of_cust[i] for i in weight_of_cust] 
durations, distances, node_df = compute_durations(cust_df, None)
fig = elbow_method(cust_df, weight, n_lb = 3, n_ub = 60, repetitions=1, method="hierarchical", durations=durations)
plotly.offline.plot(fig);
X, Y, partition, cost = hierarchical_clusterning(cust_df, weight, durations, num_of_facilities = 50, linkage="complete")
print(cost)
382277301752.6439
fig = show_optimized_continuous_network(cust_df, X, Y, partition, weight= weight)
plotly.offline.plot(fig);
aggregated_cust_df, aggregated_total_demand_df =  make_aggregated_df(cust_df, None, total_demand_df, X, Y, partition, weight)
aggregated_dc_df = generate_dc(aggregated_cust_df, aggregated_total_demand_df, prod_df, num_dc=10, lb_ratio=0.0, ub_ratio=10.3,vc_bounds=(19,21),fc_bounds=(0,1))
durations, distances, node_df = compute_durations(aggregated_cust_df, plnt_df)
aggregated_trans_df, graph, position  = make_network_using_road(aggregated_cust_df, aggregated_dc_df, plnt_df, durations, distances, plnt_dc_threshold = 10000., dc_cust_threshold = 500. )
flow_df, dc_df, cost_df, violation_df, status = solve_lnd(prod_df, aggregated_cust_df,aggregated_dc_df, plnt_df, plnt_prod_df, aggregated_total_demand_df, 
                               aggregated_trans_df, dc_num= (5,13), single_sourcing=False, max_cpu = 100)
fig = show_optimized_network(aggregated_cust_df, aggregated_dc_df, plnt_df, prod_df, flow_df, position)
plotly.offline.plot(fig);
cost_df
cost value
0 transportation (plant to dc) 9.761203e+08
1 delivey (dc to customer) 2.418022e+08
2 dc fixed -0.000000e+00
3 dc variable 6.505497e+08
4 infeasible penalty 0.000000e+00

上の費用のうち,顧客への配送費用は,集約した点に対するものなので,本来の費用ではない.本来の費用を再計算する.

from_dc = {} #集約した顧客に入る倉庫の名称を入れた辞書(集約した顧客名をキーとする)
for row in flow_df.itertuples():
    if row.to_node[:5]=="Cust_":
        from_dc[row.to_node[5:]] = row.from_node[3:]
#開設した集約倉庫に最も近い顧客を探索
from scipy.spatial import KDTree 
points = [(i,j)  for (i,j)  in zip(cust_df.lat, cust_df.lon)]
tree = KDTree(points)
dc_cust_dic={} #集約した倉庫の名前を入れると,それに最も近い,もとの顧客indexを返す辞書
for row in dc_df[ dc_df.open_close==1 ].itertuples():
    dis, near = tree.query( (row.lat, row.lon) )
    dc_cust_dic[row.name] = cust_df.loc[int(near),"index"]
durations, distances, node_df = compute_durations(cust_df, None)
#dc_set = set(from_dc.values())    #from_dc: 各集約顧客が,どのDCから来ているかを表す辞書
#agg_cust_set = set(from_dc.keys())
dc_per_dis = 10./4000
dc_per_time = 8000./4000
total_del = 0.
for row in aggregated_cust_df.itertuples():
    dc_name = from_dc[row.name]
    #print(dc_name, row.name)
    i = dc_cust_dic[dc_name]
    for c in row.customers:
        j = cust_df.loc[c,"index"]
        if distances[i][j] < 9999999:
            dis = distances[i][j]/1000.
            time = durations[i][j]/3600.
            cost_per_kg = dis*dc_per_dis + time*dc_per_time
            total_del += weight[j]*cost_per_kg 
total_del
436088454.2303338

配送費用は,クラスタリングした場合より大きくなり,総費用は以下のように増加する. これは,需要が2000未満のものを削除して最適化した場合の1.4%増であり,良い近似になっていると考えられる.

total = cost_df.value[0] + cost_df.value[3] + total_del
print(total, total/1986774926)
2020658919.3316379 1.0170547719765404

k-メディアン法で集約

k-median法で集約する k-メディアン法も道路距離と移動時間をもとに計算を行う.同じ座標の地点があると,何も割り当てられないクラスターができることがあるので,多少多めに(この場合は1つ多く)施設数を入れておく.

durations, distances, node_df = compute_durations(cust_df, None)
X, Y, partition, best_ub, lb_list, ub_list, phi_list = solve_k_median(cust_df, weight, durations, num_of_facilities=51, 
                                 max_iter=10000, max_lr=0.01, moms=(0.85,0.95), convergence=1e-5,  lr_find = True, adam=False)
fig = plot_k_median_lr_find(phi_list, lb_list)
plotly.offline.plot(fig);
#lr_finder => 0.01 , adam => 50000
X, Y, partition, best_ub, lb_list, ub_list, phi_list = solve_k_median(cust_df, weight, durations, num_of_facilities=51, 
                                 max_iter=2000, max_lr=0.05, moms=(0.85,0.95), convergence=1e-5,  lr_find = False, adam=False)
fig = plot_k_median(lb_list, ub_list)
plotly.offline.plot(fig);
fig = show_optimized_continuous_network(cust_df, X, Y, partition, weight= weight)
plotly.offline.plot(fig);
aggregated_cust_df, aggregated_total_demand_df =  make_aggregated_df(cust_df, None, total_demand_df, X, Y, partition, weight)
aggregated_dc_df = generate_dc(aggregated_cust_df, aggregated_total_demand_df, prod_df, num_dc=10, lb_ratio=0.0, ub_ratio=10.3,vc_bounds=(19,21),fc_bounds=(0,1))
durations, distances, node_df = compute_durations(aggregated_cust_df, plnt_df)
trans_df, graph, position  = make_network_using_road(aggregated_cust_df, aggregated_dc_df, plnt_df, durations, distances, plnt_dc_threshold = 10000., dc_cust_threshold = 500. )
flow_df, dc_df, cost_df, violation_df, status = solve_lnd(prod_df, aggregated_cust_df,aggregated_dc_df, plnt_df, plnt_prod_df, aggregated_total_demand_df, 
                               trans_df, dc_num= (5,13), single_sourcing=True, max_cpu = 100)
cost_df
cost value
0 transportation (plant to dc) 9.647053e+08
1 delivey (dc to customer) 2.503247e+08
2 dc fixed -0.000000e+00
3 dc variable 6.269581e+08
4 infeasible penalty -0.000000e+00
fig = show_optimized_network(aggregated_cust_df, aggregated_dc_df, plnt_df, prod_df, flow_df, position)
plotly.offline.plot(fig);
aggregated_cust_df.head()
name lat lon customers demand
0 cust0 34.577843 135.438274 [51, 112, 125, 151, 152, 206, 256, 265, 266, 267, 270, 285] 8.997956e+06
1 cust1 41.826084 140.725339 [1] 1.824079e+06
2 cust2 36.031805 139.630817 [2, 15, 64, 81, 85, 129, 238, 293] 4.023094e+06
3 cust3 34.674853 135.634251 [8, 35, 82, 86, 103, 104, 120, 150, 259, 264] 8.146061e+06
4 cust4 35.982864 139.860838 [11, 68, 77, 154, 174, 182, 260, 275, 287] 6.499158e+06
try:
    cust_df.reset_index(inplace=True)
except:
    pass
from_dc = {} #集約した顧客に入る倉庫の名称を入れた辞書(集約した顧客名をキーとする)
for row in flow_df.itertuples():
    if row.to_node[:5]=="Cust_":
        from_dc[row.to_node[5:]] = row.from_node[3:]
#開設した集約倉庫に最も近い顧客を探索
from scipy.spatial import KDTree 
points = [(i,j)  for (i,j)  in zip(cust_df.lat, cust_df.lon)]
tree = KDTree(points)
dc_cust_dic={} #集約した倉庫の名前を入れると,それに最も近い,もとの顧客indexを返す辞書
for row in dc_df[ dc_df.open_close==1 ].itertuples():
    dis, near = tree.query( (row.lat, row.lon) )
    dc_cust_dic[row.name] = cust_df.loc[int(near),"index"]
durations, distances, node_df = compute_durations(cust_df, None)
#dc_set = set(from_dc.values())    #from_dc: 各集約顧客が,どのDCから来ているかを表す辞書
#agg_cust_set = set(from_dc.keys())
dc_per_dis = 10./4000
dc_per_time = 8000./4000
total_del = 0.
for row in aggregated_cust_df.itertuples():
    try:
        dc_name = from_dc[row.name]
    except:
        continue
    i = dc_cust_dic[dc_name]
    for c in row.customers:
        j = cust_df.loc[c,"index"]
        if distances[i][j] < 9999999:
            dis = distances[i][j]/1000.
            time = durations[i][j]/3600.
            cost_per_kg = dis*dc_per_dis + time*dc_per_time
            total_del += weight[j]*cost_per_kg 
total_del
429417810.7833409
cost_df.value[0] + cost_df.value[3] + total_del
2021081255.6539354
2005167825/1986774926
1.0092576661600168

k-means法で集約

k-means法は重心法ともよばれ,座標をもとに平面上の適当な地点に施設を配置する.

weight_of_prod ={p:w for (p,w) in zip(prod_df.name, prod_df.weight) } #prepare weight dic.
weight_of_cust = defaultdict(int)
for row in total_demand_df.itertuples():
    weight_of_cust[row.cust] += weight_of_prod[row.prod]*row.demand 
weight = [weight_of_cust[i] for i in weight_of_cust] 
durations, distances, node_df = compute_durations(cust_df, None)
durations, distances, node_df = compute_durations(cust_df, None)
X, Y, partition, cost = kmeans(cust_df, weight, num_of_facilities =50)
fig = show_optimized_continuous_network(cust_df, X, Y, partition, weight= weight)
plotly.offline.plot(fig);
aggregated_cust_df, aggregated_total_demand_df =  make_aggregated_df(cust_df, None, total_demand_df, X, Y, partition, weight)
aggregated_dc_df = generate_dc(aggregated_cust_df, aggregated_total_demand_df, prod_df, num_dc=10, lb_ratio=0.0, ub_ratio=10.3,vc_bounds=(19,21),fc_bounds=(0,1))
durations, distances, node_df = compute_durations(aggregated_cust_df, plnt_df)
trans_df, graph, position  = make_network_using_road(aggregated_cust_df, aggregated_dc_df, plnt_df, durations, distances, plnt_dc_threshold = 10000., dc_cust_threshold = 500. )
flow_df, dc_df, cost_df, violation_df, status = solve_lnd(prod_df, aggregated_cust_df,aggregated_dc_df, plnt_df, plnt_prod_df, aggregated_total_demand_df, 
                               trans_df, dc_num= (5,13), single_sourcing=True, max_cpu = 100)
cost_df
cost value
0 transportation (plant to dc) 9.935685e+08
1 delivey (dc to customer) 2.343569e+08
2 dc fixed -0.000000e+00
3 dc variable 6.559424e+08
4 infeasible penalty -0.000000e+00
#cust_df.reset_index(inplace=True)
from_dc = {} #集約した顧客に入る倉庫の名称を入れた辞書(集約した顧客名をキーとする)
for row in flow_df.itertuples():
    if row.to_node[:5]=="Cust_":
        from_dc[row.to_node[5:]] = row.from_node[3:]
#開設した集約倉庫に最も近い顧客を探索
from scipy.spatial import KDTree 
points = [(i,j)  for (i,j)  in zip(cust_df.lat, cust_df.lon)]
tree = KDTree(points)
dc_cust_dic={} #集約した倉庫の名前を入れると,それに最も近い,もとの顧客indexを返す辞書
for row in dc_df[ dc_df.open_close==1 ].itertuples():
    dis, near = tree.query( (row.lat, row.lon) )
    dc_cust_dic[row.name] = cust_df.loc[int(near),"index"]
durations, distances, node_df = compute_durations(cust_df, None)
#dc_set = set(from_dc.values())    #from_dc: 各集約顧客が,どのDCから来ているかを表す辞書
#agg_cust_set = set(from_dc.keys())
dc_per_dis = 10./4000
dc_per_time = 8000./4000
total_del = 0.
for row in aggregated_cust_df.itertuples():
    #try:
    dc_name = from_dc[row.name]
    #except:
    #    print("error")
    #    continue
    i = dc_cust_dic[dc_name]
    for c in row.customers:
        j = cust_df.loc[c,"index"]
        if distances[i][j] < 9999999:
            dis = distances[i][j]/1000.
            time = durations[i][j]/3600.
            cost_per_kg = dis*dc_per_dis + time*dc_per_time
            total_del += weight[j]*cost_per_kg 
        else:
            print(i,j,"error")
#total_del
cost_df.value[0] + cost_df.value[3] + total_del
304 413 error
304 420 error
304 429 error
2022563380.9275873

道路距離が非常に大きい地点間で配送を行っている. これは,離島(対馬)への道路距離が計算できないためである.

cost_df.value[0] + cost_df.value[3] + total_del
2022563380.9275873
fig = show_optimized_network(aggregated_cust_df, aggregated_dc_df, plnt_df, prod_df, flow_df, position)
plotly.offline.plot(fig);

反復 Weiszfeld法で集約

k-means法が施設と顧客の距離の(重み付き)自乗和の最小化であるのに対して, Weiszfeld法は(重み付き)直線距離の和を最小化する.

durations, distances, node_df = compute_durations(cust_df, None)
%%time
X, Y, partition, cost = repeated_weiszfeld(cust_df, weight, 
            num_of_facilities = 50, epsilon=0.0001, max_iter = 30, numpy=True, seed=2)
print(cost)
2462506476.093891
CPU times: user 2min 38s, sys: 83.3 ms, total: 2min 38s
Wall time: 2min 38s
fig = show_optimized_continuous_network(cust_df, X, Y, partition, weight= weight)
plotly.offline.plot(fig);
aggregated_cust_df, aggregated_total_demand_df =  make_aggregated_df(cust_df, None, total_demand_df, X, Y, partition, weight)
aggregated_dc_df = generate_dc(aggregated_cust_df, aggregated_total_demand_df, prod_df, num_dc=10, lb_ratio=0.0, ub_ratio=10.3,vc_bounds=(19,21),fc_bounds=(0,1))
durations, distances, node_df = compute_durations(aggregated_cust_df, plnt_df)
trans_df, graph, position  = make_network_using_road(aggregated_cust_df, aggregated_dc_df, plnt_df, durations, distances, plnt_dc_threshold = 10000., dc_cust_threshold = 500. )
flow_df, dc_df, cost_df, violation_df, status = solve_lnd(prod_df, aggregated_cust_df,aggregated_dc_df, plnt_df, plnt_prod_df, aggregated_total_demand_df, 
                               trans_df, dc_num= (5,13), single_sourcing=True, max_cpu = 100)
cost_df
cost value
0 transportation (plant to dc) 9.902503e+08
1 delivey (dc to customer) 2.799122e+08
2 dc fixed -0.000000e+00
3 dc variable 6.400399e+08
4 infeasible penalty -0.000000e+00
fig = show_optimized_network(aggregated_cust_df, aggregated_dc_df, plnt_df, prod_df, flow_df, position)
plotly.offline.plot(fig);
cust_df.reset_index(inplace=True)
from_dc = {} #集約した顧客に入る倉庫の名称を入れた辞書(集約した顧客名をキーとする)
for row in flow_df.itertuples():
    if row.to_node[:5]=="Cust_":
        from_dc[row.to_node[5:]] = row.from_node[3:]
#開設した集約倉庫に最も近い顧客を探索
from scipy.spatial import KDTree 
points = [(i,j)  for (i,j)  in zip(cust_df.lat, cust_df.lon)]
tree = KDTree(points)
dc_cust_dic={} #集約した倉庫の名前を入れると,それに最も近い,もとの顧客indexを返す辞書
for row in dc_df[ dc_df.open_close==1 ].itertuples():
    dis, near = tree.query( (row.lat, row.lon) )
    dc_cust_dic[row.name] = cust_df.loc[int(near),"index"]
durations, distances, node_df = compute_durations(cust_df, None)
#dc_set = set(from_dc.values())    #from_dc: 各集約顧客が,どのDCから来ているかを表す辞書
#agg_cust_set = set(from_dc.keys())
dc_per_dis = 10./4000
dc_per_time = 8000./4000
total_del = 0.
for row in aggregated_cust_df.itertuples():
    try:
        dc_name = from_dc[row.name]
    except:
        print("error")
        continue
    i = dc_cust_dic[dc_name]
    for c in row.customers:
        j = cust_df.loc[c,"index"]
        if distances[i][j] < 9999999:
            dis = distances[i][j]/1000.
            time = durations[i][j]/3600.
            cost_per_kg = dis*dc_per_dis + time*dc_per_time
            total_del += weight[j]*cost_per_kg 
        else:
            print(i,j,"error")
#total_del
cost_df.value[0] + cost_df.value[3] + total_del
122 413 error
122 420 error
122 429 error
1992036715.0329504

k-means法と同様に,道路距離が非常に大きい地点間で配送を行っている. これは,離島(対馬)への道路距離が計算できないためである.