'''Module for generating node distributions.'''
from trafpy.generator.src.dists import plot_dists
from trafpy.generator.src.dists import val_dists
from trafpy.generator.src import tools
import sys
import numpy as np
import time
import json
import copy
[docs]def assign_probs_to_matrix(eps, probs, matrix=None):
'''Assigns probabilities to 2D matrix.
probs can be list of pair probabilities or dict of key-value pair-probability
N.B. if probs is list, assumes probs are given in order of matrix indices when looping
for src in eps for dst in eps
'''
if matrix is None:
matrix = np.zeros((len(eps),len(eps)))
num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps)
sum_prob = 0
if type(probs) == dict:
for pair in list(probs.keys()):
prob = probs[pair]
pair = json.loads(pair)
src, dst = pair[0], pair[1]
src_idx, dst_idx = node_to_index[src], node_to_index[dst]
matrix[src_idx, dst_idx] = prob
matrix[dst_idx, src_idx] = prob
sum_prob += prob*2
matrix_sum = np.round(np.sum(matrix),2)
else:
iter = np.nditer(np.array(probs))
for src in eps:
for dst in eps:
if src == dst:
continue
elif node_to_index[src] > node_to_index[dst]:
# making symmetric so skip this side of diagonal
continue
else:
prob = next(iter)
src_idx = node_to_index[src]
dst_idx = node_to_index[dst]
matrix[src_idx,dst_idx] = prob
matrix[dst_idx,src_idx] = prob
return matrix
[docs]def assign_matrix_to_probs(eps, node_dist):
'''Assigns probabilities in 2D matrix to a src-dst pair prob dist dict.'''
num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps)
node_dist = np.asarray(node_dist)
pair_prob_dict = {}
for src in eps:
for dst in eps:
if src == dst:
continue
else:
src_idx, dst_idx = node_to_index[src], node_to_index[dst]
pair = json.dumps([src_idx, dst_idx])
pair_prob_dict[pair] = node_dist[src_idx, dst_idx]
return pair_prob_dict
[docs]def gen_multimodal_node_dist(eps,
skewed_nodes=[],
skewed_node_probs=[],
num_skewed_nodes=None,
rack_prob_config=None,
path_to_save=None,
plot_fig=False,
show_fig=False,
print_data=False):
'''Generates a multimodal node distribution.
Generates a multimodal node demand distribution i.e. certain nodes
have a certain specified probability of being chosen. If no
skewed nodes given, randomly selects random no. node(s) to skew. If no
skew node probabilities given, random selects probability with which
to skew the node between 0.5 and 0.8. If no num skewed nodes given,
randomly chooses number of nodes to skew.
Args:
eps (list): List of network node endpoints that can act as sources
& destinations
skewed_nodes (list): Node(s) to whose probability of being
chosen you want to skew/specify
skewed_node_probs (list): Probabilit(y)(ies) of node(s)
being chosen/specified skews
num_skewed_nodes (int): Number of nodes to skew. If None, will gen
a number between 10% and 30% of the total number of nodes in network
rack_prob_config (dict): Network endpoints/servers are often grouped into
physically local clusters or `racks'. Different networks may have
different levels of inter- (between) and intra- (within) rack communication.
If rack_prob_config is left as None, will assume that server-server
srs-dst requests are independent of which rack they might be in.
If specified, dict should have a `racks_dict' key, whose value is a dict
with keys as rack labels (e.g. 'rack_0', 'rack_1' etc.) and whose value
for each key is a list of the endpoints in the respective rack
(e.g. [`server_0', `server_24', `server_56', ...]), and a `prob_inter_rack'
key whose value is a float (e.g. 0.9), setting the probability
that a chosen src endpoint has a destination which is outside of its rack.
If you want to e.g. designate an entire rack as a 'hot rack' (many
traffic requests occur from this rack), would specify skewed_nodes to
contain the list of servers in this rack and configure rack_prob_config
appropriately.
path_to_save (str): Path to directory (with file name included) in which
to save generated distribution. E.g. path_to_save='data/dists/my_dist'.
plot_fig (bool): Whether or not to plot fig. If True, will return fig.
show_fig (bool): Whether or not to plot and show fig. If True, will
return and display fig.
print_data (bool): Whether or not to print extra information about the
generated data.
Returns:
tuple: Tuple containing:
- **node_dist** (*numpy array*): 2D matrix array of souce-destination pair
probabilities of being chosen.
- **fig** (*matplotlib.figure.Figure, optional*): Node distributions
plotted as a 2D matrix. To return, set show_fig=True and/or plot_fig=True.
'''
# initialise graph params
num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps)
index_to_pair, pair_to_index = get_network_pair_mapper(eps)
node_dist = np.zeros((num_nodes, num_nodes))
if num_skewed_nodes is None:
num_skewed_nodes = len(skewed_nodes)
if len(skewed_nodes) == 0:
if num_skewed_nodes == 0:
# randomly choose number of nodes to skew
min_skews = int(0.1*num_nodes)
max_skews = int(0.3*num_nodes)
if min_skews < 1:
min_skews = 1
max_skews = 2
num_skewed_nodes = np.random.randint(min_skews, max_skews+1)
# randomly choose a node
skewed_nodes = list(np.random.choice(eps, size=num_skewed_nodes))
if len(skewed_node_probs) == 0:
# randomly choose skew between range
min_prob = 0.5/num_skewed_nodes
max_prob = 0.8/num_skewed_nodes
skewed_node_probs = np.random.uniform(min_prob,
max_prob,
size=num_skewed_nodes)
skewed_node_probs = list(skewed_node_probs)
skewed_node_probs_dict = {node: prob for node, prob in zip(skewed_nodes, skewed_node_probs)}
if print_data:
print('Num skewed nodes: {}'.format(num_skewed_nodes))
print('Chosen for skew:\n{}'.format(skewed_node_probs_dict.keys()))
print('Chosen probs:\n{}'.format(skewed_node_probs_dict.values()))
# find prob of each skewed node pair being chosen
pairs_per_node = num_nodes - 1
probs_per_skewed_pair = {node: prob for node, prob in zip(skewed_nodes, [p/pairs_per_node for p in skewed_node_probs])}
# update prob pair chosen for each pair with a skewed node
prob_pair_chosen = {pair: 0 for pair in pair_to_index.keys()}
unskewed_pairs = {pair: 0 for pair in pair_to_index.keys()} # maintain for efficient hashing
for node in probs_per_skewed_pair.keys():
# for pair_idx in range(pairs_per_node):
for pair_idx in range(num_nodes):
if index_to_node[pair_idx] != node:
pair = json.dumps([node, index_to_node[pair_idx]])
try:
prob_pair_chosen[pair] += probs_per_skewed_pair[node]/2
try:
del unskewed_pairs[pair]
except KeyError:
# already registered as no longer being unskewed
pass
except KeyError:
pair = json.loads(pair)
pair = [pair[1],pair[0]]
pair = json.dumps(pair)
prob_pair_chosen[pair] += probs_per_skewed_pair[node]/2 # allocate 2x so divide by 2
try:
del unskewed_pairs[pair]
except KeyError:
# already registered as no longer being unskewed
pass
total_skew_prob = np.sum(list(prob_pair_chosen.values()))
# assign prob pair chosen to any pairs w/o skewed nodes
if total_skew_prob < 0.5:
num_remaining_pairs = list(prob_pair_chosen.values()).count(0)
prob_dist = np.ones((num_remaining_pairs))/((num_remaining_pairs))
num_experiments = num_remaining_pairs * 100
counter_array = np.random.multinomial(num_experiments,
prob_dist,
size=1)[0]
counter_array_prob_dist = (counter_array/(num_experiments*2))
counter_array_prob_dist = ((0.5-total_skew_prob)/0.5) * counter_array_prob_dist
iterable = np.nditer(counter_array_prob_dist)
for pair in unskewed_pairs.keys():
prob_pair_chosen[pair] = next(iterable)
if print_data:
print('Prob pair chosen:\n{}'.format(prob_pair_chosen))
# assign probabilites to normalised demand matrix
node_dist = assign_probs_to_matrix(eps=eps,
probs=list(prob_pair_chosen.values()),
matrix=node_dist)
if rack_prob_config is not None:
# adjust node prob dist to account for rack prob config
node_dist = adjust_node_dist_for_rack_prob_config(rack_prob_config,
eps,
node_dist,
print_data=print_data)
matrix_sum = np.round(np.sum(node_dist),2)
assert matrix_sum == 1, \
'matrix must sum to 1, but is {}'.format(matrix_sum)
if print_data:
print('Normalised matrix:\n{}'.format(node_dist))
print('Normalised matrix sum: {}'.format(matrix_sum))
if path_to_save is not None:
tools.pickle_data(path_to_save, node_dist)
if plot_fig or show_fig:
fig = plot_dists.plot_node_dist(node_dist=node_dist,
eps=eps,
show_fig=show_fig)
return node_dist, fig
else:
return node_dist
# def gen_multimodal_node_dist(eps,
# skewed_nodes=[],
# skewed_node_probs=[],
# num_skewed_nodes=None,
# rack_prob_config=None,
# path_to_save=None,
# plot_fig=False,
# show_fig=False,
# print_data=False):
# '''Generates a multimodal node distribution.
# Generates a multimodal node demand distribution i.e. certain nodes
# have a certain specified probability of being chosen. If no
# skewed nodes given, randomly selects random no. node(s) to skew. If no
# skew node probabilities given, random selects probability with which
# to skew the node between 0.5 and 0.8. If no num skewed nodes given,
# randomly chooses number of nodes to skew.
# Args:
# eps (list): List of network node endpoints that can act as sources
# & destinations
# skewed_nodes (list): Node(s) to whose probability of being
# chosen you want to skew/specify
# skewed_node_probs (list): Probabilit(y)(ies) of node(s)
# being chosen/specified skews
# num_skewed_nodes (int): Number of nodes to skew. If None, will gen
# a number between 10% and 30% of the total number of nodes in network
# rack_prob_config (dict): Network endpoints/servers are often grouped into
# physically local clusters or `racks'. Different networks may have
# different levels of inter- (between) and intra- (within) rack communication.
# If rack_prob_config is left as None, will assume that server-server
# srs-dst requests are independent of which rack they might be in.
# If specified, dict should have a `racks_dict' key, whose value is a dict
# with keys as rack labels (e.g. 'rack_0', 'rack_1' etc.) and whose value
# for each key is a list of the endpoints in the respective rack
# (e.g. [`server_0', `server_24', `server_56', ...]), and a `prob_inter_rack'
# key whose value is a float (e.g. 0.9), setting the probability
# that a chosen src endpoint has a destination which is outside of its rack.
# If you want to e.g. designate an entire rack as a 'hot rack' (many
# traffic requests occur from this rack), would specify skewed_nodes to
# contain the list of servers in this rack and configure rack_prob_config
# appropriately.
# path_to_save (str): Path to directory (with file name included) in which
# to save generated distribution. E.g. path_to_save='data/dists/my_dist'.
# plot_fig (bool): Whether or not to plot fig. If True, will return fig.
# show_fig (bool): Whether or not to plot and show fig. If True, will
# return and display fig.
# print_data (bool): Whether or not to print extra information about the
# generated data.
# Returns:
# tuple: Tuple containing:
# - **node_dist** (*numpy array*): 2D matrix array of souce-destination pair
# probabilities of being chosen.
# - **fig** (*matplotlib.figure.Figure, optional*): Node distributions
# plotted as a 2D matrix. To return, set show_fig=True and/or plot_fig=True.
# '''
# # initialise graph params
# num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps)
# node_dist = np.zeros((num_nodes, num_nodes))
# if num_skewed_nodes is None:
# num_skewed_nodes = len(skewed_nodes)
# if len(skewed_nodes) == 0:
# if num_skewed_nodes == 0:
# # randomly choose number of nodes to skew
# min_skews = int(0.1*num_nodes)
# max_skews = int(0.3*num_nodes)
# if min_skews < 1:
# min_skews = 1
# max_skews = 2
# num_skewed_nodes = np.random.randint(min_skews, max_skews+1)
# # randomly choose a node
# skewed_nodes = list(np.random.choice(eps, size=num_skewed_nodes))
# if len(skewed_node_probs) == 0:
# # randomly choose skew between range
# min_prob = 0.5/num_skewed_nodes
# max_prob = 0.8/num_skewed_nodes
# skewed_node_probs = np.random.uniform(min_prob,
# max_prob,
# size=num_skewed_nodes)
# skewed_node_probs = list(np.round(skewed_node_probs,2))
# if print_data:
# print('Num skewed nodes: {}'.format(num_skewed_nodes))
# print('Chosen for skew:\n{}'.format(skewed_nodes))
# print('Chosen probs:\n{}'.format(skewed_node_probs))
# # get indices of node pairs to skew
# pairs_per_node = num_nodes - 1
# skewed_node_pair_indices = np.zeros((pairs_per_node, num_skewed_nodes))
# for skewed_node_iter in range(num_skewed_nodes):
# skewed_node_pair_iter = 0
# matrix_iter = 0
# for src in eps:
# for dst in eps:
# if src == dst:
# continue
# elif src > dst:
# continue
# else:
# skewed_node = skewed_nodes[skewed_node_iter]
# if skewed_node == src or skewed_node == dst:
# skewed_node_pair_indices[skewed_node_pair_iter,skewed_node_iter] = matrix_iter
# skewed_node_pair_iter+=1
# matrix_iter += 1
# if print_data:
# print('Skewed node pair indices:\n{}'.format(skewed_node_pair_indices))
# # find prob of each skewed node pair being chosen
# probs_per_skewed_pair = np.zeros(num_skewed_nodes)
# for node in range(num_skewed_nodes):
# probs_per_skewed_pair[node] = skewed_node_probs[node] / pairs_per_node
# # update prob pair chosen for each pair with a skewed node
# prob_pair_chosen = np.zeros(num_pairs)
# iter = np.nditer(skewed_node_pair_indices)
# for skewed_node_iter in range(num_skewed_nodes):
# for skewed_node_pair_iter in range(pairs_per_node):
# for pair in range(num_pairs):
# if pair == skewed_node_pair_indices[skewed_node_pair_iter,skewed_node_iter]:
# # add to skew of node
# prob_pair_chosen[pair] += probs_per_skewed_pair[skewed_node_iter]
# else:
# continue
# # will allocate twice
# prob_pair_chosen = prob_pair_chosen / 2
# total_skew_prob = np.sum(prob_pair_chosen)
# # assign prob pair chosen to any pairs w/o skewed nodes
# if total_skew_prob < 0.5:
# remaining_pairs = np.count_nonzero(prob_pair_chosen == 0)
# prob_dist = np.ones((remaining_pairs))/((remaining_pairs))
# counter_array = np.random.multinomial(500,
# prob_dist,
# size=1)[0]
# counter_array_prob_dist = (counter_array/1000)
# counter_array_prob_dist = ((0.5-total_skew_prob)/0.5) * counter_array_prob_dist
# iter = np.nditer(counter_array_prob_dist)
# for pair in range(len(prob_pair_chosen)):
# if prob_pair_chosen[pair] == 0:
# prob_pair_chosen[pair] = next(iter)
# if print_data:
# print('Prob pair chosen:\n{}'.format(prob_pair_chosen))
# # assign probabilites to normalised demand matrix
# node_dist = assign_probs_to_matrix(eps=eps,
# probs=prob_pair_chosen,
# matrix=node_dist)
# if rack_prob_config is not None:
# # adjust node prob dist to account for rack prob config
# node_dist = adjust_node_dist_for_rack_prob_config(rack_prob_config,
# eps,
# node_dist,
# print_data=print_data)
# matrix_sum = np.round(np.sum(node_dist),2)
# assert matrix_sum == 1, \
# 'matrix must sum to 1, but is {}'.format(matrix_sum)
# if print_data:
# print('Normalised matrix:\n{}'.format(node_dist))
# print('Normalised matrix sum: {}'.format(matrix_sum))
# if path_to_save is not None:
# tools.pickle_data(path, node_dist)
# if plot_fig or show_fig:
# fig = plot_dists.plot_node_dist(node_dist=node_dist,
# eps=eps,
# show_fig=show_fig)
# return node_dist, fig
# else:
# return node_dist
[docs]def get_suitable_destination_node_for_rack_config(sn, node_dist, eps, ep_to_rack, rack_to_ep, inter_rack):
'''Given source node, finds destination node given inter and intra rack config.'''
sn_rack = ep_to_rack[sn]
# get list of suitable destination nodes
dn_eps = []
if inter_rack:
# get list of inter rack destination nodes for this source node
for rack in rack_to_ep.keys():
if rack != sn_rack:
rack_eps = rack_to_ep[rack]
dn_eps.append(rack_eps)
else:
# get list of intra rack destination nodes for this source node
for rack in rack_to_ep.keys():
if rack == sn_rack:
rack_eps = rack_to_ep[rack]
dn_eps.append(rack_eps)
# flatten
dn_eps_flat = []
for l in dn_eps:
for d in l:
dn_eps_flat.append(d)
# find suitable destination for source node
dn = copy.deepcopy(sn)
while sn == dn:
dn = gen_demand_nodes(eps=dn_eps_flat,
node_dist=node_dist,
size=1,
axis=1,
check_sum_valid=False)[0]
return dn
[docs]def get_inter_intra_rack_pair_prob_dicts(pair_prob_dict, ep_to_rack_dict):
inter_rack_pair_prob_dict = {}
intra_rack_pair_prob_dict = {}
for pair in list(pair_prob_dict.keys()):
pair_loaded = json.loads(pair)
src, dst = pair_loaded[0], pair_loaded[1]
if ep_to_rack_dict[src] == ep_to_rack_dict[dst]:
# intra-rack
try:
intra_rack_pair_prob_dict[pair] = pair_prob_dict[pair]
except KeyError:
pair = json.loads(pair)
pair = json.dumps([pair[1],pair[0]])
intra_rack_pair_prob_dict[pair] = pair_prob_dict[pair]
else:
# inter-rack
try:
inter_rack_pair_prob_dict[pair] = pair_prob_dict[pair]
except KeyError:
pair = json.loads(pair)
pair = json.dumps([pair[1],pair[0]])
inter_rack_pair_prob_dict[pair] = pair_prob_dict[pair]
return inter_rack_pair_prob_dict, intra_rack_pair_prob_dict
[docs]def adjust_node_dist_for_rack_prob_config(rack_prob_config,
eps,
node_dist,
print_data=False):
'''Unlike the other adjust_node_dist_from_multinomial_exp_for_rack_prob_config function,
this function does not use a multinomial experiment to adjust the prob dist,
but rather uses a deterministic method of distorting the probabilities from
the original node distribution such that the required inter-/intra-rack probabilities
are met.
'''
num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps)
index_to_pair, pair_to_index = get_network_pair_mapper(eps)
ep_to_rack_dict = {}
for key, val in rack_prob_config['racks_dict'].items():
for v in val:
if v not in ep_to_rack_dict.keys():
ep_to_rack_dict[v] = key
pair_prob_dict = get_pair_prob_dict_of_node_dist_matrix(node_dist, eps)
inter_rack_pair_prob_dict, intra_rack_pair_prob_dict = get_inter_intra_rack_pair_prob_dicts(pair_prob_dict, ep_to_rack_dict)
# get current inter intra rack probs
inter_rack_prob = sum(list(inter_rack_pair_prob_dict.values()))
intra_rack_prob = sum(list(intra_rack_pair_prob_dict.values()))
if print_data:
print('inter_rack_prob: {}'.format(inter_rack_prob))
print('intra_rack_prob: {}'.format(intra_rack_prob))
print('sum: {}'.format(inter_rack_prob+intra_rack_prob))
target_inter_rack_prob = rack_prob_config['prob_inter_rack'] / 2 # allocate 2x so divide by 2
diff_inter_rack_prob = target_inter_rack_prob - inter_rack_prob
diff_per_inter_rack_pair = diff_inter_rack_prob / len(list(inter_rack_pair_prob_dict.keys()))
diff_per_intra_rack_pair = -(diff_inter_rack_prob / len(list(intra_rack_pair_prob_dict.keys())))
if print_data:
print('target_inter_rack_prob: {}'.format(target_inter_rack_prob))
print('diff_inter_rack_prob: {}'.format(diff_inter_rack_prob))
print('diff_per_inter_rack_pair: {}'.format(diff_per_inter_rack_pair))
print('diff_per_intra_rack_pair: {}'.format(diff_per_intra_rack_pair))
# adjust probs so have desired inter intra rack probs
# update inter rack probs
for inter_rack_pair in inter_rack_pair_prob_dict.keys():
updated_prob = inter_rack_pair_prob_dict[inter_rack_pair] + diff_per_inter_rack_pair
if updated_prob < 0:
# cant have 0 probs, take away from max prob instead
pairs = list(inter_rack_pair_prob_dict.keys())
probs = list(inter_rack_pair_prob_dict.values())
highest_prob_pair = pairs[probs.index(max(probs))]
inter_rack_pair_prob_dict[highest_prob_pair] += diff_per_inter_rack_pair
if inter_rack_pair_prob_dict[highest_prob_pair] < 0:
# raise Exception('Negative {} probability encountered.'.format(inter_rack_pair_prob_dict[highest_prob_pair]))
inter_rack_pair_prob_dict[highest_prob_pair] = 0
else:
# wont have 0 probs, can apply update
inter_rack_pair_prob_dict[inter_rack_pair] = updated_prob
# update intra rack probs
for intra_rack_pair in intra_rack_pair_prob_dict.keys():
updated_prob = intra_rack_pair_prob_dict[intra_rack_pair] + diff_per_intra_rack_pair
if updated_prob < 0:
# cant have 0 probs, take away from max prob instead
pairs = list(intra_rack_pair_prob_dict.keys())
probs = list(intra_rack_pair_prob_dict.values())
highest_prob_pair = pairs[probs.index(max(probs))]
intra_rack_pair_prob_dict[highest_prob_pair] += diff_per_intra_rack_pair
if intra_rack_pair_prob_dict[highest_prob_pair] < 0:
# raise Exception('Negative {} probability encountered.'.format(intra_rack_pair_prob_dict[highest_prob_pair]))
intra_rack_pair_prob_dict[highest_prob_pair] = 0
else:
# wont have 0 probs, can apply update
intra_rack_pair_prob_dict[intra_rack_pair] = updated_prob
inter_rack_prob = sum(list(inter_rack_pair_prob_dict.values()))
intra_rack_prob = sum(list(intra_rack_pair_prob_dict.values()))
if print_data:
print('inter_rack_prob: {}'.format(inter_rack_prob))
print('intra_rack_prob: {}'.format(intra_rack_prob))
print('sum: {}'.format(inter_rack_prob+intra_rack_prob))
# create new adjusted pair_prob_dict
adjusted_pair_prob_dict = {}
for pair, prob in inter_rack_pair_prob_dict.items():
adjusted_pair_prob_dict[pair] = prob
for pair, prob in intra_rack_pair_prob_dict.items():
adjusted_pair_prob_dict[pair] = prob
# correct any minor errors in python floating point arithmetic
adjusted_pair_prob_dict = adjust_probability_dict_sum(adjusted_pair_prob_dict, target_sum=0.5)
if print_data:
print('adjusted sum: {}'.format(sum(list(adjusted_pair_prob_dict.values()))))
# assign to create adjusted prob matrix
node_dist = assign_probs_to_matrix(eps=eps,
probs=adjusted_pair_prob_dict,
matrix=node_dist)
return node_dist
[docs]def convert_sampled_pairs_into_node_dist(sampled_pairs, eps):
# convert sampled pairs dict to rand vars
sampled_pairs = val_dists.convert_key_occurrences_to_data(list(sampled_pairs.keys()), list(sampled_pairs.values()))
# convert sampled pairs list into prob dist
unique_vals, pmf = val_dists.gen_discrete_prob_dist(sampled_pairs)
sampled_pair_prob_dist = {unique_var: prob for unique_var, prob in zip(unique_vals, pmf)}
# insert 0 probabilites for any pairs that were never sampled
num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps)
index_to_pair, pair_to_index = get_network_pair_mapper(eps)
if num_pairs != len(sampled_pair_prob_dist.keys()):
# some pairs were never chosen, find which are missing
chosen_pair_indices = {}
for pair in sampled_pair_prob_dist.keys():
try:
chosen_pair_indices[pair_to_index[pair]] = None
except KeyError:
pair = json.loads(pair)
pair = [pair[1],pair[0]]
pair = json.dumps(pair)
chosen_pair_indices[pair_to_index[pair]] = None
for index in index_to_pair.keys():
if index not in chosen_pair_indices:
# index is missing, update sampled pairs dict with 0 prob for this index
sampled_pair_prob_dist[json.dumps(index_to_pair[index])] = 0
# convert prob dist into dict whose keys are node dist matrix indices
matrix_pair_prob_dist = convert_pair_prob_dist_dict_to_matrix_pair_prob_dist_dict(sampled_pair_prob_dist, eps)
# sort so that get correct ordering of pair probs when assign to node matrix
sorted_matrix_pair_prob_dist_keys = sorted((matrix_pair_prob_dist.keys()))
# generate new node matrix prob dist (which now accounts for rack probs)
prob_pair_chosen = []
for key in sorted_matrix_pair_prob_dist_keys:
prob = matrix_pair_prob_dist[key]/2 # divide by 2 since one side of diagonal
prob_pair_chosen.append(prob)
node_dist = assign_probs_to_matrix(eps=eps,
probs=prob_pair_chosen)
return node_dist
[docs]def convert_pair_prob_dist_dict_to_matrix_pair_prob_dist_dict(pair_prob_dist, eps):
'''
Args:
pair_prob_dist (dict): Dict whose keys are node pairs and whose values are
probabilities or fractions.
Returns:
matrix_pair_prob_dist (dict): Dict whose keys are matrix indices of the node
pairs and whose values are the pairs' corresponding probabilities or fractions.
'''
# convert prob dist into dict whose keys are node dist matrix indices
index_to_pair, pair_to_index = get_network_pair_mapper(eps)
matrix_pair_prob_dist = {}
for key in pair_prob_dist.keys():
try:
matrix_index = pair_to_index[key]
except KeyError:
# switch src and dst
pair = json.loads(key)
key = json.dumps([pair[1], pair[0]])
matrix_index = pair_to_index[key]
try:
matrix_pair_prob_dist[matrix_index] = pair_prob_dist[key]
except KeyError:
# switch src and dst back for pair prob dist
pair = json.loads(key)
key = json.dumps([pair[1], pair[0]])
matrix_pair_prob_dist[matrix_index] = pair_prob_dist[key]
return matrix_pair_prob_dist
[docs]def adjust_node_dist_from_multinomial_exp_for_rack_prob_config(rack_prob_config,
eps,
node_dist,
num_exps_factor=2,
print_data=False):
'''Unlike the other adjust_node_dist_for_rack_prob_config function,
this function adjusts the node dist by running multinomial experiments
on the initial node distribution to sample from it. It therefore takes
much much longer than the other function, especially for networks with >1,000 nodes.
Takes node dist and uses it to generate new node dist given inter- and intra-rack configuration.
Different DCNs have different inter and intra rack traffic. This function
allows you to specify how much of your traffic should be inter and intra rack.
Args:
rack_prob_config (dict): Network endpoints/servers are often grouped into
physically local clusters or `racks'. Different networks may have
different levels of inter- (between) and intra- (within) rack communication.
If rack_prob_config is left as None, will assume that server-server
srs-dst requests are independent of which rack they might be in.
If specified, dict should have a `racks_dict' key, whose value is a dict
with keys as rack labels (e.g. 'rack_0', 'rack_1' etc.) and whose value
for each key is a list of the endpoints in the respective rack
(e.g. [`server_0', `server_24', `server_56', ...]), and a `prob_inter_rack'
key whose value is a float (e.g. 0.9), setting the probability
that a chosen src endpoint has a destination which is outside of its rack.
If you want to e.g. designate an entire rack as a 'hot rack' (many
traffic requests occur from this rack), would specify skewed_nodes to
contain the list of servers in this rack and configure rack_prob_config
appropriately.
eps (list): List of network node endpoints that can act as sources
& destinations.
node_dist (numpy array): 2D matrix array of source-destination pair
probabilities of being chosen.
num_exps_factor (int): Factor by which to multiply number of ep pairs to get
the number of multinomial experiments to run when generating new
node dist.
print_data (bool): Whether or not to print extra information about the
generated data.
'''
node_dist = copy.deepcopy(node_dist)
# switch racks_dict keys and values to make hashing easier
racks_dict = {}
for key, val in rack_prob_config['racks_dict'].items():
for v in val:
if v not in racks_dict.keys():
racks_dict[v] = key
# run multinomial exp to incpororate rack probabilities
num_pairs = np.int(((len(eps)**2) - len(eps))/2)
num_experiments = num_exps_factor * num_pairs
sampled_pairs = {}
inter_counter = 0
intra_counter = 0
for exp in range(num_experiments):
# sample if connection should be intra or inter rack
inter_rack = np.random.choice(a=[True, False],
p=[rack_prob_config['prob_inter_rack'],
1-rack_prob_config['prob_inter_rack']])
if inter_rack:
inter_counter += 1
else:
intra_counter += 1
# sample a source node
sn = gen_demand_nodes(eps=eps,
node_dist=node_dist,
size=1,
axis=0,
check_sum_valid=False)[0]
# sample destination node given inter_rack config
dn = get_suitable_destination_node_for_rack_config(sn,
copy.deepcopy(node_dist),
eps,
ep_to_rack=racks_dict,
rack_to_ep=rack_prob_config['racks_dict'],
inter_rack=inter_rack)
pair = json.dumps([sn, dn])
if pair not in sampled_pairs:
# check if switched src-dst pair that already occurred
pair_switched = json.dumps([dn, sn])
if pair_switched not in sampled_pairs:
sampled_pairs[pair] = 1 # init first occurrence of pair
else:
# pair already seen before
sampled_pairs[pair_switched] += 1
else:
# pair already seen before
sampled_pairs[pair] += 1
# convert sampled pairs dict to rand vars
sampled_pairs = val_dists.convert_key_occurrences_to_data(list(sampled_pairs.keys()), list(sampled_pairs.values()))
if print_data:
total = inter_counter + intra_counter
print('Number inter-rack requests: {} ({}%)'.format(inter_counter, inter_counter*100/total))
print('Number intra-rack requests: {} ({}%)'.format(intra_counter, intra_counter*100/total))
# convert sampled pairs list into prob dist
unique_vals, pmf = val_dists.gen_discrete_prob_dist(sampled_pairs)
sampled_pair_prob_dist = {unique_var: prob for unique_var, prob in zip(unique_vals, pmf)}
# insert 0 probabilites for any pairs that were never sampled in multinomial exp
num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps)
index_to_pair, pair_to_index = get_network_pair_mapper(eps)
if num_pairs != len(sampled_pair_prob_dist.keys()):
# some pairs were never chosen, find which are missing
chosen_pair_indices = {}
for pair in sampled_pair_prob_dist.keys():
try:
chosen_pair_indices[pair_to_index[pair]] = None
except KeyError:
pair = json.loads(pair)
pair = [pair[1],pair[0]]
pair = json.dumps(pair)
chosen_pair_indices[pair_to_index[pair]] = None
for index in index_to_pair.keys():
if index not in chosen_pair_indices:
# index is missing, update sampled pairs dict with 0 prob for this index
sampled_pair_prob_dist[json.dumps(index_to_pair[index])] = 0
# convert prob dist into dict whose keys are node dist matrix indices
index_to_pair, pair_to_index = get_network_pair_mapper(eps)
matrix_pair_prob_dist = {}
for key in sampled_pair_prob_dist.keys():
try:
matrix_index = pair_to_index[key]
except KeyError:
# switch src and dst
pair = json.loads(key)
key = json.dumps([pair[1], pair[0]])
matrix_index = pair_to_index[key]
try:
matrix_pair_prob_dist[matrix_index] = sampled_pair_prob_dist[key]
except KeyError:
# switch src and dst back for sampled pair prob dist
pair = json.loads(key)
key = json.dumps([pair[1], pair[0]])
matrix_pair_prob_dist[matrix_index] = sampled_pair_prob_dist[key]
# sort so that get correct ordering of pair probs when assign to node matrix
sorted_matrix_pair_prob_dist_keys = sorted((matrix_pair_prob_dist.keys()))
# generate new node matrix prob dist (which now accounts for rack probs)
prob_pair_chosen = []
for key in sorted_matrix_pair_prob_dist_keys:
prob = matrix_pair_prob_dist[key]/2 # divide by 2 since one side of diagonal
prob_pair_chosen.append(prob)
if print_data:
print('Prob pair chosen after accounting for rack prob config:\n{}'.format(prob_pair_chosen))
print(len(prob_pair_chosen))
node_dist = assign_probs_to_matrix(eps=eps,
probs=prob_pair_chosen,
matrix=node_dist)
return node_dist
[docs]def get_network_pair_mapper(eps):
'''Gets dicts mapping network endpoint indices to and from node dist matrix.'''
num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps)
index_to_pair = {}
pair_to_index = {}
pair_index = 0
for src in eps:
for dst in eps:
if src == dst:
continue
elif node_to_index[src] > node_to_index[dst]:
# making symmetric so skip this side of diagonal
continue
else:
index_to_pair[pair_index] = [src, dst]
pair_to_index[json.dumps([src, dst])] = pair_index
pair_index += 1
return index_to_pair, pair_to_index
[docs]def gen_multimodal_node_pair_dist(eps,
skewed_pairs = [],
skewed_pair_probs = [],
num_skewed_pairs=None,
rack_prob_config=None,
path_to_save=None,
plot_fig=False,
show_fig=False,
print_data=False):
'''Generates a multimodal node pair distribution.
Generates a multimodal node pair demand distribution i.e. certain node
pairs have a certain specified probability of being chosen. If no
skewed pairs given, randomly selects pair to skew. If no skew
pair probabilities given, random selects probability with which
to skew the pair between 0.1 and 0.3. If no num skewed pairs given,
randomly chooses number of pairs to skew.
Args:
eps (list): List of network node endpoints that can act as sources
& destinations.
skewed_pairs (list of lists): List of the node pairs [src,dst] to
skew.
skewed_pair_probs (list): Probabilities of node pairs being
chosen.
num_skewed_pairs (int): Number of pairs to randomly skew.
rack_prob_config (dict): Network endpoints/servers are often grouped into
physically local clusters or `racks'. Different networks may have
different levels of inter- (between) and intra- (within) rack communication.
If rack_prob_config is left as None, will assume that server-server
srs-dst requests are independent of which rack they might be in.
If specified, dict should have a `racks_dict' key, whose value is a dict
with keys as rack labels (e.g. 'rack_0', 'rack_1' etc.) and whose value
for each key is a list of the endpoints in the respective rack
(e.g. [`server_0', `server_24', `server_56', ...]), and a `prob_inter_rack'
key whose value is a float (e.g. 0.9), setting the probability
that a chosen src endpoint has a destination which is outside of its rack.
If you want to e.g. designate an entire rack as a 'hot rack' (many
traffic requests occur from this rack), would specify skewed_nodes to
contain the list of servers in this rack and configure rack_prob_config
appropriately.
path_to_save (str): Path to directory (with file name included) in which
to save generated distribution. E.g. path_to_save='data/dists/my_dist'.
plot_fig (bool): Whether or not to plot fig. If True, will return fig.
show_fig (bool): Whether or not to plot and show fig. If True, will
return and display fig.
print_data (bool): Whether or not to print extra information about the
generated data.
Returns:
tuple: Tuple containing:
- **node_dist** (*numpy array*): 2D matrix array of souce-destination pair
probabilities of being chosen.
- **fig** (*matplotlib.figure.Figure, optional*): Node distributions
plotted as a 2D matrix. To return, set show_fig=True and/or plot_fig=True.
'''
# initialise graph params
num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps)
index_to_pair, pair_to_index = get_network_pair_mapper(eps)
node_dist = np.zeros((num_nodes, num_nodes))
if num_skewed_pairs is None:
num_skewed_pairs = len(skewed_pairs)
if len(skewed_pairs) == 0:
if num_skewed_pairs == 0:
#randomly choose number of nodes to skew
min_skews = int(0.1*num_pairs)
max_skews = int(0.3*num_pairs)
if min_skews < 1:
min_skews = 1
max_skews = 2
num_skewed_pairs = np.random.randint(min_skews, max_skews+1)
# randomly choose src and dst for pairs
nodes = eps
src = np.random.choice(nodes, size=num_skewed_pairs)
dst = np.random.choice(nodes, size=num_skewed_pairs)
# remove src-dst conflicts and repeated pairs, then add to skewed pairs list
for idx in range(num_skewed_pairs):
sn, dn = src[idx], dst[idx]
pair = [sn, dn]
pair_flipped = [dn, sn]
while sn == dn or pair in skewed_pairs or pair_flipped in skewed_pairs:
dn = np.random.choice(nodes, size=1)[0]
pair = [sn, dn]
pair_flipped = [sn, dn]
dst[idx] = dn
skewed_pairs.append([sn, dn])
# keep src<dst convention consistent
for pair_iter in range(num_skewed_pairs):
pair = skewed_pairs[pair_iter]
if node_to_index[pair[0]] > node_to_index[pair[1]]:
# swap src and dst to keep convention consistent
temp_src = pair[1]
temp_dst = pair[0]
skewed_pairs[pair_iter] = [temp_src,temp_dst]
# update src and dst
for pair in range(len(src)):
src[pair] = skewed_pairs[pair][0]
dst[pair] = skewed_pairs[pair][1]
else:
# skewed pairs already given
assert (any(isinstance(pair,list) for pair in skewed_pairs)), \
'must enter skewed_pairs as list of lists'
src = [pair[0] for pair in skewed_pairs]
dst = [pair[1] for pair in skewed_pairs]
if print_data:
print('Chosen sources:\n{}'.format(src))
print('Chosen destinations:\n{}'.format(dst))
print('Num skewed pairs: {}'.format(num_skewed_pairs))
print('Chosen pairs to skew:\n{}'.format(skewed_pairs))
if len(skewed_pair_probs) == 0:
# randomly choose skew between range
min_prob = 0.3/num_skewed_pairs
max_prob = 0.5/num_skewed_pairs
skewed_pair_probs = np.random.uniform(min_prob,
max_prob,
size=num_skewed_pairs)
skewed_pair_probs = list(skewed_pair_probs)
if print_data:
print('Skew probs:\n{}'.format(skewed_pair_probs))
# find prob of each skewed node pair being chosen
pairs_per_node = num_nodes - 1
prob_pair_chosen = {pair: 0 for pair in pair_to_index.keys()}
_skewed_pairs = []
for p in skewed_pairs:
# ensure same order
if json.dumps(p) not in prob_pair_chosen:
_skewed_pairs.append(json.dumps(p[::-1]))
else:
_skewed_pairs.append(json.dumps(p))
skewed_pairs = _skewed_pairs
probs_per_skewed_pair = {pair: prob for pair, prob in zip(skewed_pairs, [p for p in skewed_pair_probs])}
# update prob pair chosen for each pair with a skewed node
unskewed_pairs = {pair: 0 for pair in pair_to_index.keys()} # maintain for efficient hashing
for skewed_pair in probs_per_skewed_pair.keys():
try:
prob_pair_chosen[skewed_pair] += probs_per_skewed_pair[skewed_pair]/2
try:
del unskewed_pairs[skewed_pair]
except KeyError:
# already registered as no longer being unskewed
pass
except KeyError:
skewed_pair = json.loads(skewed_pair)
skewed_pair = [skewed_pair[1],skewed_pair[0]]
skewed_pair = json.dumps(skewed_pair)
prob_pair_chosen[skewed_pair] += probs_per_skewed_pair[skewed_pair]/2
try:
del unskewed_pairs[skewed_pair]
except KeyError:
# already registered as no longer being unskewed
pass
total_skew_prob = np.sum(list(prob_pair_chosen.values()))
# assign prob pair chosen to any pairs w/o skewed nodes
if total_skew_prob < 0.5:
num_remaining_pairs = list(prob_pair_chosen.values()).count(0)
prob_dist = np.ones((num_remaining_pairs))/((num_remaining_pairs))
num_experiments = num_remaining_pairs * 100
counter_array = np.random.multinomial(num_experiments,
prob_dist,
size=1)[0]
counter_array_prob_dist = (counter_array/(num_experiments*2))
counter_array_prob_dist = ((0.5-total_skew_prob)/0.5) * counter_array_prob_dist
iterable = np.nditer(counter_array_prob_dist)
for pair in unskewed_pairs.keys():
prob_pair_chosen[pair] = next(iterable)
if print_data:
print('Prob pair chosen:\n{}'.format(prob_pair_chosen))
# assign probabilites to normalised demand matrix
node_dist = assign_probs_to_matrix(eps=eps,
probs=list(prob_pair_chosen.values()),
matrix=node_dist)
if rack_prob_config is not None:
# adjust node prob dist to account for rack prob config
node_dist = adjust_node_dist_for_rack_prob_config(rack_prob_config,
eps,
node_dist,
print_data=print_data)
matrix_sum = np.round(np.sum(node_dist),2)
assert matrix_sum == 1, \
'matrix must sum to 1, but is {}'.format(matrix_sum)
if print_data:
print('Normalised matrix:\n{}'.format(node_dist))
print('Normalised matrix sum: {}'.format(matrix_sum))
if path_to_save is not None:
tools.pickle_data(path_to_save, node_dist)
if plot_fig or show_fig:
fig = plot_dists.plot_node_dist(node_dist=node_dist,
eps=eps,
show_fig=show_fig)
return node_dist, fig
else:
return node_dist
# def gen_multimodal_node_pair_dist(eps,
# skewed_pairs = [],
# skewed_pair_probs = [],
# num_skewed_pairs=None,
# rack_prob_config=None,
# path_to_save=None,
# plot_fig=False,
# show_fig=False,
# print_data=False):
# '''Generates a multimodal node pair distribution.
# Generates a multimodal node pair demand distribution i.e. certain node
# pairs have a certain specified probability of being chosen. If no
# skewed pairs given, randomly selects pair to skew. If no skew
# pair probabilities given, random selects probability with which
# to skew the pair between 0.1 and 0.3. If no num skewed pairs given,
# randomly chooses number of pairs to skew.
# Args:
# eps (list): List of network node endpoints that can act as sources
# & destinations.
# skewed_pairs (list of lists): List of the node pairs [src,dst] to
# skew.
# skewed_pair_probs (list): Probabilities of node pairs being
# chosen.
# num_skewed_pairs (int): Number of pairs to randomly skew.
# rack_prob_config (dict): Network endpoints/servers are often grouped into
# physically local clusters or `racks'. Different networks may have
# different levels of inter- (between) and intra- (within) rack communication.
# If rack_prob_config is left as None, will assume that server-server
# srs-dst requests are independent of which rack they might be in.
# If specified, dict should have a `racks_dict' key, whose value is a dict
# with keys as rack labels (e.g. 'rack_0', 'rack_1' etc.) and whose value
# for each key is a list of the endpoints in the respective rack
# (e.g. [`server_0', `server_24', `server_56', ...]), and a `prob_inter_rack'
# key whose value is a float (e.g. 0.9), setting the probability
# that a chosen src endpoint has a destination which is outside of its rack.
# If you want to e.g. designate an entire rack as a 'hot rack' (many
# traffic requests occur from this rack), would specify skewed_nodes to
# contain the list of servers in this rack and configure rack_prob_config
# appropriately.
# path_to_save (str): Path to directory (with file name included) in which
# to save generated distribution. E.g. path_to_save='data/dists/my_dist'.
# plot_fig (bool): Whether or not to plot fig. If True, will return fig.
# show_fig (bool): Whether or not to plot and show fig. If True, will
# return and display fig.
# print_data (bool): Whether or not to print extra information about the
# generated data.
# Returns:
# tuple: Tuple containing:
# - **node_dist** (*numpy array*): 2D matrix array of souce-destination pair
# probabilities of being chosen.
# - **fig** (*matplotlib.figure.Figure, optional*): Node distributions
# plotted as a 2D matrix. To return, set show_fig=True and/or plot_fig=True.
# '''
# # initialise graph params
# num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps)
# node_dist = np.zeros((num_nodes, num_nodes))
# if num_skewed_pairs is None:
# num_skewed_pairs = len(skewed_pairs)
# if len(skewed_pairs) == 0:
# if num_skewed_pairs == 0:
# #randomly choose number of nodes to skew
# min_skews = int(0.1*num_pairs)
# max_skews = int(0.3*num_pairs)
# if min_skews < 1:
# min_skews = 1
# max_skews = 2
# num_skewed_pairs = np.random.randint(min_skews, max_skews+1)
# # randomly choose src and dst for pairs
# nodes = eps
# src = np.random.choice(nodes, size=num_skewed_pairs)
# dst = np.random.choice(nodes, size=num_skewed_pairs)
# # remove src-dst conflicts and repeated pairs
# for pair_iter in range(num_skewed_pairs):
# try:
# pair = skewed_pairs[pair_iter]
# except IndexError:
# pair = []
# while (skewed_pairs.count(pair) > 1 or src[pair_iter] == dst[pair_iter]):
# # remove repeated pairs and src-dst conflicts
# dst[pair_iter] = np.random.choice(nodes, size=1)[0]
# skewed_pairs.append([src[pair_iter],dst[pair_iter]])
# # keep src<dst convention consistent
# for pair_iter in range(num_skewed_pairs):
# pair = skewed_pairs[pair_iter]
# if pair[0] > pair[1]:
# # swap src and dst to keep convention consistent
# temp_src = pair[1]
# temp_dst = pair[0]
# skewed_pairs[pair_iter] = [temp_src,temp_dst]
# # update src and dst
# for pair in range(len(src)):
# src[pair] = skewed_pairs[pair][0]
# dst[pair] = skewed_pairs[pair][1]
# else:
# assert (any(isinstance(pair,list) for pair in skewed_pairs)), \
# 'must enter skewed_pairs as list of lists'
# src = [pair[0] for pair in skewed_pairs]
# dst = [pair[1] for pair in skewed_pairs]
# if print_data:
# print('Chosen sources:\n{}'.format(src))
# print('Chosen destinations:\n{}'.format(dst))
# print('Num skewed pairs: {}'.format(num_skewed_pairs))
# print('Chosen pairs to skew:\n{}'.format(skewed_pairs))
# if len(skewed_pair_probs) == 0:
# # randomly choose skew between range
# min_prob = 0.3/num_skewed_pairs
# max_prob = 0.5/num_skewed_pairs
# skewed_pair_probs = np.random.uniform(min_prob,
# max_prob,
# size=num_skewed_pairs)
# skewed_pair_probs = list(skewed_pair_probs)
# if print_data:
# print('Skew probs:\n{}'.format(skewed_pair_probs))
# # get indices of node pairs to skew
# skewed_pair_indices = np.zeros(len(skewed_pairs))
# skewed_pair_iter = 0
# for pair in range(len(skewed_pairs)):
# matrix_iter = 0
# for src in eps:
# for dst in eps:
# if src == dst:
# continue
# elif src > dst:
# continue
# else:
# if skewed_pairs[pair][0] == src and skewed_pairs[pair][1] == dst:
# skewed_pair_indices[skewed_pair_iter] = matrix_iter
# skewed_pair_iter += 1
# elif skewed_pairs[pair][0] == dst and skewed_pairs[pair][1] == src:
# skewed_pair_indices[skewed_pair_iter] = matrix_iter
# skewed_pair_iter += 1
# matrix_iter += 1
# if print_data:
# print('Skew indices:\n{}'.format(skewed_pair_indices))
# # update prob pair chosen for each skewed pair
# prob_pair_chosen = np.zeros(num_pairs)
# iter = np.nditer(np.asarray(skewed_pair_probs))
# for skewed_pair_iter in range(num_skewed_pairs):
# for pair_iter in range(num_pairs):
# if pair_iter == skewed_pair_indices[skewed_pair_iter]:
# # add to skew of pair
# prob = next(iter)
# prob_pair_chosen[pair_iter] += prob
# else:
# continue
# # will allocate twice
# prob_pair_chosen = prob_pair_chosen / 2
# total_skew_prob = np.sum(prob_pair_chosen)
# # assign prob pair chosen to any pairs w/o skew
# if total_skew_prob < 0.5:
# remaining_pairs = np.count_nonzero(prob_pair_chosen == 0)
# prob_dist = np.ones((remaining_pairs))/((remaining_pairs))
# counter_array = np.random.multinomial(500,
# prob_dist,
# size=1)[0]
# counter_array_prob_dist = (counter_array/1000)
# counter_array_prob_dist = ((0.5-total_skew_prob)/0.5) * counter_array_prob_dist
# iter = np.nditer(counter_array_prob_dist)
# for pair in range(len(prob_pair_chosen)):
# if prob_pair_chosen[pair] == 0:
# prob_pair_chosen[pair] = next(iter)
# if print_data:
# print('Prob pair chosen:\n{}'.format(prob_pair_chosen))
# # assign probabilites to normalised demand matrix
# node_dist = assign_probs_to_matrix(eps=eps,
# probs=prob_pair_chosen,
# matrix=node_dist)
# if rack_prob_config is not None:
# # adjust node prob dist to account for rack prob config
# node_dist = adjust_node_dist_for_rack_prob_config(rack_prob_config,
# eps,
# node_dist,
# print_data=print_data)
# matrix_sum = np.round(np.sum(node_dist),2)
# assert matrix_sum == 1, \
# 'matrix must sum to 1, but is {}'.format(matrix_sum)
# if print_data:
# print('Normalised matrix:\n{}'.format(node_dist))
# print('Normalised matrix sum: {}'.format(matrix_sum))
# if path_to_save is not None:
# tools.pickle_data(path, node_dist)
# if plot_fig or show_fig:
# fig = plot_dists.plot_node_dist(node_dist=node_dist,
# eps=eps,
# show_fig=show_fig)
# return node_dist, fig
# else:
# return node_dist
[docs]def gen_node_demands(eps,
node_dist,
num_demands,
rack_prob_config=None,
duplicate=False,
path_to_save=None):
'''Uses node distribution to generate src-dst node pair demands.
Args:
eps (list): List of network node endpoints that can act as sources
& destinations.
node_dist (numpy array): 2D matrix array of source-destination pair
probabilities of being chosen.
num_demands (int): Number of src-dst node pairs to generate.
duplicate (bool): Whether or not to duplicate src-dst node pairs. Use
this is demands you're generating have a 'take down' event as well
as an 'establish' event.
path_to_save (str): Path to directory (with file name included) in which
to save generated distribution. E.g. path_to_save='data/dists/my_dist'.
Returns:
tuple: Tuple containing:
- **sn** (*numpy array*): Selected source nodes.
- **dn** (*numpy array*): Selected destination nodes.
'''
matrix_sum = np.round(np.sum(node_dist),2)
assert matrix_sum == 1, \
'demand dist matrix must sum to 1, but is {}'.format(matrix_sum)
# init
if duplicate:
sn = np.array(np.zeros((2*num_demands)),dtype=object)
dn = np.array(np.zeros((2*num_demands)), dtype=object)
else:
sn = np.array(np.zeros((num_demands)),dtype=object)
dn = np.array(np.zeros((num_demands)), dtype=object)
# source nodes
sn[:num_demands] = gen_demand_nodes(eps=eps,
node_dist=node_dist,
size=num_demands,
axis=0)
if duplicate:
sn[num_demands:] = sn[:num_demands]
# destination nodes
dn[:num_demands] = gen_demand_nodes(eps=eps,
node_dist=node_dist,
size=num_demands,
axis=1)
if duplicate:
dn[num_demands:] = dn[:num_demands]
# remove any src-dst conflicts
for request in np.arange(num_demands):
while sn[request] == dn[request]:
dn[request] = gen_demand_nodes(eps=eps,
node_dist=node_dist,
size=1,
axis=1,
check_sum_valid=False)[0]
if duplicate:
dn[num_demands:] = dn[:num_demands] # duplicate
if rack_prob_config is not None:
# adjust for rack config
# switch racks_dict keys and values to make hasing easier
racks_dict = {}
for key, val in rack_prob_config['racks_dict'].items():
for v in val:
if v not in racks_dict.keys():
racks_dict[v] = key
for request in np.arange(num_demands):
# sample if connection should be intra or inter rack
inter_rack = np.random.choice(a=[True, False],
p=[rack_prob_config['prob_inter_rack'],
1-rack_prob_config['prob_inter_rack']])
# sample destination node given inter_rack config
dn[request] = get_suitable_destination_node_for_rack_config(sn[request],
node_dist,
eps,
ep_to_rack=racks_dict,
rack_to_ep=rack_prob_config['racks_dict'],
inter_rack=inter_rack)
# start = time.time()
# if inter_rack:
# # src and dst must be in different racks
# while racks_dict[sn[request]] == racks_dict[dn[request]]:
# dn[request] = gen_demand_nodes(eps=eps,
# node_dist=node_dist,
# size=1,
# axis=1)[0]
# if time.time()-start > 20:
# raise Exception('Cannot find src and dst in different racks. Consider adding more racks, reducing number of endpoints per rack, or changing rack_prob_config.')
# else:
# # src and dst must be in same rack
# while racks_dict[sn[request]] != racks_dict[dn[request]]:
# dn[request] = gen_demand_nodes(eps=eps,
# node_dist=node_dist,
# size=1,
# axis=1)[0]
# if time.time()-start > 20:
# raise Exception('Cannot find src and dst in same rack. Consider removing racks, increasing number of endpoints per rack, or changing rack_prob_config.')
if duplicate:
dn[num_demands:] = dn[:num_demands]
if path_to_save is not None:
data = {'sn': sn, 'dn': dn}
tools.pickle_data(path_to_save, data)
return sn, dn
[docs]def gen_demand_nodes(eps,
node_dist,
size,
axis,
path_to_save=None,
check_sum_valid=True):
'''Generates demand nodes following the node_dist distribution
Args:
eps (list): List of node endpoint labels.
node_dist (numpy array): Probability distribution each node is chosen
size (int): Number of demand nodes to generate
axis (int, 1 or 0): Which axis of normalised node distribution to consider.
E.g. If generating src nodes, axis=0. If dst nodes, axis=1
path_to_save (str): Path to directory (with file name included) in which
to save generated distribution. E.g. path_to_save='data/dists/my_dist'.
check_sum_valid (bool): Whether or not to ensure node dist sums to 1.
If need efficiency, should set to False.
'''
if check_sum_valid:
matrix_sum = np.round(np.sum(node_dist),2)
assert matrix_sum == 1, \
'matrix must sum to 1, but is {}'.format(matrix_sum)
if len(eps) != len(np.sum(node_dist,axis=axis)):
# must index node dist
num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps)
ep_node_indices = [node_to_index[ep] for ep in eps]
node_dist = node_dist[ep_node_indices]
# make sure sums to 1 (sometimes python floating point arithmetic causes incorrect rounding, and will adjust if used above node indexing)
probs = adjust_probability_array_sum(np.sum(node_dist, axis=axis), target_sum=1, print_data=False)
nodes = np.random.choice(a = eps,
size = size,
p = probs).astype(object)
if path_to_save is not None:
tools.pickle_data(path_to_save, nodes)
return nodes
[docs]def adjust_probability_array_sum(probs, target_sum=1, print_data=False):
'''For array.'''
# check probabilites sum to 1 (python floating point arithmetic can cause problems so need this)
adjusted_probs = copy.deepcopy(probs)
total = np.sum(adjusted_probs)
if print_data:
print('Initial sum: {}'.format(total))
difference = total - target_sum
if difference != 0:
# need slight adjustment so probs sum to target
diff_per_pair = difference/len(adjusted_probs)
for idx in range(len(adjusted_probs)):
adjusted_probs[idx] -= diff_per_pair
if print_data:
print('Final sum: {}'.format(np.sum(adjusted_probs)))
return adjusted_probs
[docs]def adjust_probability_dict_sum(probs, target_sum=1, print_data=False):
'''For dict.'''
# check probabilites sum to 1 (python floating point arithmetic can cause problems so need this)
adjusted_probs = copy.deepcopy(probs)
total = np.sum(list(adjusted_probs.values()))
if print_data:
print('Initial sum: {}'.format(total))
difference = total - target_sum
if difference != 0:
# need slight adjustment so probs sum to target
diff_per_pair = difference/len(list(adjusted_probs.keys()))
for key in adjusted_probs.keys():
adjusted_probs[key] -= diff_per_pair
if print_data:
print('Final sum: {}'.format(np.sum(list(adjusted_probs.values()))))
return adjusted_probs
[docs]def get_pair_prob_dict_of_node_dist_matrix(node_dist, eps, all_combinations=False, bidirectional=False):
'''Gets prob dict of each pair being chosen given node dist of probabilities.
If all_combinations, will record pair probabilities for all possible pair combinations
i.e. src-dst and dst-src. If False, assumes src-dst==dst-src.
If bidirectional, will multiply probabilities by 2 as pair can be src-dst or dst-src.
If bidirectional=True -> values sum to 1, if bidirectional=False -> values sum to 0.5.
'''
index_to_pair, pair_to_index = get_network_pair_mapper(eps)
num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps, all_combinations=all_combinations)
pair_prob_dict = {pair: 0 for pair in list(pair_to_index.keys())}
for src in eps:
for dst in eps:
src_idx = node_to_index[src]
dst_idx = node_to_index[dst]
if src_idx == dst_idx:
continue
if not all_combinations:
if src_idx > dst_idx:
# making symmetric so skip this side of diagonal
continue
else:
pair = json.dumps([src, dst])
try:
pair_prob_dict[pair] = node_dist[src_idx, dst_idx]
if bidirectional:
pair_prob_dict[pair] += node_dist[dst_idx, src_idx]
except KeyError:
pair = json.loads(pair)
pair = [pair[1],pair[0]]
pair = json.dumps(pair)
pair_prob_dict[pair] = node_dist[src_idx, dst_idx]
if bidirectional:
pair_prob_dict[pair] += node_dist[dst_idx, src_idx]
else:
# not making symmetric since src-dst != dst-src, do not skip any side of diagonal
pair = json.dumps([src, dst])
try:
pair_prob_dict[pair] = node_dist[src_idx, dst_idx]
if bidirectional:
pair_prob_dict[pair] += node_dist[dst_idx, src_idx]
except KeyError:
pair = json.loads(pair)
pair = [pair[1],pair[0]]
pair = json.dumps(pair)
pair_prob_dict[pair] = node_dist[src_idx, dst_idx]
if bidirectional:
pair_prob_dict[pair] += node_dist[dst_idx, src_idx]
return pair_prob_dict