Source code for trafpy.generator.src.dists.node_dists

'''Module for generating node distributions.'''

from trafpy.generator.src.dists import plot_dists
from trafpy.generator.src.dists import val_dists
from trafpy.generator.src import tools

import sys
import numpy as np
import time
import json
import copy




[docs]def assign_probs_to_matrix(eps, probs, matrix=None): '''Assigns probabilities to 2D matrix. probs can be list of pair probabilities or dict of key-value pair-probability N.B. if probs is list, assumes probs are given in order of matrix indices when looping for src in eps for dst in eps ''' if matrix is None: matrix = np.zeros((len(eps),len(eps))) num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps) sum_prob = 0 if type(probs) == dict: for pair in list(probs.keys()): prob = probs[pair] pair = json.loads(pair) src, dst = pair[0], pair[1] src_idx, dst_idx = node_to_index[src], node_to_index[dst] matrix[src_idx, dst_idx] = prob matrix[dst_idx, src_idx] = prob sum_prob += prob*2 matrix_sum = np.round(np.sum(matrix),2) else: iter = np.nditer(np.array(probs)) for src in eps: for dst in eps: if src == dst: continue elif node_to_index[src] > node_to_index[dst]: # making symmetric so skip this side of diagonal continue else: prob = next(iter) src_idx = node_to_index[src] dst_idx = node_to_index[dst] matrix[src_idx,dst_idx] = prob matrix[dst_idx,src_idx] = prob return matrix
[docs]def assign_matrix_to_probs(eps, node_dist): '''Assigns probabilities in 2D matrix to a src-dst pair prob dist dict.''' num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps) node_dist = np.asarray(node_dist) pair_prob_dict = {} for src in eps: for dst in eps: if src == dst: continue else: src_idx, dst_idx = node_to_index[src], node_to_index[dst] pair = json.dumps([src_idx, dst_idx]) pair_prob_dict[pair] = node_dist[src_idx, dst_idx] return pair_prob_dict
[docs]def gen_uniform_node_dist(eps, rack_prob_config=None, path_to_save=None, plot_fig=False, show_fig=False, print_data=False): '''Generates a uniform node distribution. Args: eps (list): List of network node endpoints that can act as sources & destinations rack_prob_config (dict): Network endpoints/servers are often grouped into physically local clusters or `racks'. Different networks may have different levels of inter- (between) and intra- (within) rack communication. If rack_prob_config is left as None, will assume that server-server srs-dst requests are independent of which rack they might be in. If specified, dict should have a `racks_dict' key, whose value is a dict with keys as rack labels (e.g. 'rack_0', 'rack_1' etc.) and whose value for each key is a list of the endpoints in the respective rack (e.g. [`server_0', `server_24', `server_56', ...]), and a `prob_inter_rack' key whose value is a float (e.g. 0.9), setting the probability that a chosen src endpoint has a destination which is outside of its rack. If you want to e.g. designate an entire rack as a 'hot rack' (many traffic requests occur from this rack), would specify skewed_nodes to contain the list of servers in this rack and configure rack_prob_config appropriately. path_to_save (str): Path to directory (with file name included) in which to save generated distribution. E.g. path_to_save='data/dists/my_dist'. plot_fig (bool): Whether or not to plot fig. If True, will return fig. show_fig (bool): Whether or not to plot and show fig. If True, will return and display fig. print_data (bool): Whether or not to print extra information about the generated data. Returns: tuple: Tuple containing: - **node_dist** (*numpy array*): 2D matrix array of souce-destination pair probabilities of being chosen. - **fig** (*matplotlib.figure.Figure, optional*): Node distributions plotted as a 2D matrix. To return, set show_fig=True and/or plot_fig=True. ''' # init network params num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps) node_dist = np.zeros((num_nodes, num_nodes)) # uniform prob each pair chosen prob_pair_chosen = (np.ones((num_pairs))/((num_pairs)))/2 if print_data: print('Prob pair chosen:\n{}'.format(prob_pair_chosen)) # assign probabilites to matrix node_dist = assign_probs_to_matrix(eps=eps, probs=prob_pair_chosen, matrix=node_dist) if rack_prob_config is not None: # adjust node prob dist to account for rack prob config node_dist = adjust_node_dist_for_rack_prob_config(rack_prob_config, eps, node_dist, print_data=print_data) matrix_sum = np.round(np.sum(node_dist),2) assert matrix_sum == 1, \ 'matrix must sum to 1, but is {}'.format(matrix_sum) if print_data: print('Normalised matrix:\n{}'.format(node_dist)) print('Normalised matrix sum: {}'.format(matrix_sum)) if path_to_save is not None: tools.pickle_data(path_to_save, node_dist) if plot_fig or show_fig: fig = plot_dists.plot_node_dist(node_dist=node_dist, eps=eps, show_fig=show_fig) return node_dist, fig else: return node_dist
[docs]def gen_uniform_multinomial_exp_node_dist(eps, rack_prob_config=None, path_to_save=None, plot_fig=False, show_fig=False, print_data=False): '''Runs multinomial exp with uniform initial probability to generate slight skew. Runs a multinomial experiment where each node pair has same (uniform) probability of being chosen. Will generate a node demand distribution where a few pairs & nodes have a slight skew in demand Args: eps (list): List of network node endpoints that can act as sources & destinations rack_prob_config (dict): Network endpoints/servers are often grouped into physically local clusters or `racks'. Different networks may have different levels of inter- (between) and intra- (within) rack communication. If rack_prob_config is left as None, will assume that server-server srs-dst requests are independent of which rack they might be in. If specified, dict should have a `racks_dict' key, whose value is a dict with keys as rack labels (e.g. 'rack_0', 'rack_1' etc.) and whose value for each key is a list of the endpoints in the respective rack (e.g. [`server_0', `server_24', `server_56', ...]), and a `prob_inter_rack' key whose value is a float (e.g. 0.9), setting the probability that a chosen src endpoint has a destination which is outside of its rack. If you want to e.g. designate an entire rack as a 'hot rack' (many traffic requests occur from this rack), would specify skewed_nodes to contain the list of servers in this rack and configure rack_prob_config appropriately. path_to_save (str): Path to directory (with file name included) in which to save generated distribution. E.g. path_to_save='data/dists/my_dist'. plot_fig (bool): Whether or not to plot fig. If True, will return fig. show_fig (bool): Whether or not to plot and show fig. If True, will return and display fig. print_data (bool): Whether or not to print extra information about the generated data. Returns: tuple: Tuple containing: - **node_dist** (*numpy array*): 2D matrix array of souce-destination pair probabilities of being chosen. - **fig** (*matplotlib.figure.figure, optional*): node distribution plotted as a 2d matrix. to return, set show_fig=true and/or plot_fig=true. ''' # initialise graph params num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps) node_dist = np.zeros((num_nodes, num_nodes)) prob_pair_chosen = np.ones((num_pairs))/((num_pairs)) # run multinomial exp to get no. times each pair chosen counter_array = np.random.multinomial(500, prob_pair_chosen, size=1)[0] # get probabilities each pair chosen counter_array_prob_dist = counter_array/1000 # assign probabilites to matrix node_dist = assign_probs_to_matrix(eps=eps, probs=counter_array_prob_dist, matrix=node_dist) if rack_prob_config is not None: # adjust node prob dist to account for rack prob config node_dist = adjust_node_dist_for_rack_prob_config(rack_prob_config, eps, node_dist, print_data=print_data) matrix_sum = np.round(np.sum(node_dist),2) assert matrix_sum == 1, \ 'matrix must sum to 1, but is {}'.format(matrix_sum) if print_data: print('Normalised matrix:\n{}'.format(node_dist)) print('Normalised matrix sum: {}'.format(matrix_sum)) if path_to_save is not None: tools.pickle_data(path_to_save, node_dist) if plot_fig or show_fig: fig = plot_dists.plot_node_dist(node_dist=node_dist, eps=eps, show_fig=show_fig) return node_dist, fig else: return node_dist
[docs]def gen_multimodal_node_dist(eps, skewed_nodes=[], skewed_node_probs=[], num_skewed_nodes=None, rack_prob_config=None, path_to_save=None, plot_fig=False, show_fig=False, print_data=False): '''Generates a multimodal node distribution. Generates a multimodal node demand distribution i.e. certain nodes have a certain specified probability of being chosen. If no skewed nodes given, randomly selects random no. node(s) to skew. If no skew node probabilities given, random selects probability with which to skew the node between 0.5 and 0.8. If no num skewed nodes given, randomly chooses number of nodes to skew. Args: eps (list): List of network node endpoints that can act as sources & destinations skewed_nodes (list): Node(s) to whose probability of being chosen you want to skew/specify skewed_node_probs (list): Probabilit(y)(ies) of node(s) being chosen/specified skews num_skewed_nodes (int): Number of nodes to skew. If None, will gen a number between 10% and 30% of the total number of nodes in network rack_prob_config (dict): Network endpoints/servers are often grouped into physically local clusters or `racks'. Different networks may have different levels of inter- (between) and intra- (within) rack communication. If rack_prob_config is left as None, will assume that server-server srs-dst requests are independent of which rack they might be in. If specified, dict should have a `racks_dict' key, whose value is a dict with keys as rack labels (e.g. 'rack_0', 'rack_1' etc.) and whose value for each key is a list of the endpoints in the respective rack (e.g. [`server_0', `server_24', `server_56', ...]), and a `prob_inter_rack' key whose value is a float (e.g. 0.9), setting the probability that a chosen src endpoint has a destination which is outside of its rack. If you want to e.g. designate an entire rack as a 'hot rack' (many traffic requests occur from this rack), would specify skewed_nodes to contain the list of servers in this rack and configure rack_prob_config appropriately. path_to_save (str): Path to directory (with file name included) in which to save generated distribution. E.g. path_to_save='data/dists/my_dist'. plot_fig (bool): Whether or not to plot fig. If True, will return fig. show_fig (bool): Whether or not to plot and show fig. If True, will return and display fig. print_data (bool): Whether or not to print extra information about the generated data. Returns: tuple: Tuple containing: - **node_dist** (*numpy array*): 2D matrix array of souce-destination pair probabilities of being chosen. - **fig** (*matplotlib.figure.Figure, optional*): Node distributions plotted as a 2D matrix. To return, set show_fig=True and/or plot_fig=True. ''' # initialise graph params num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps) index_to_pair, pair_to_index = get_network_pair_mapper(eps) node_dist = np.zeros((num_nodes, num_nodes)) if num_skewed_nodes is None: num_skewed_nodes = len(skewed_nodes) if len(skewed_nodes) == 0: if num_skewed_nodes == 0: # randomly choose number of nodes to skew min_skews = int(0.1*num_nodes) max_skews = int(0.3*num_nodes) if min_skews < 1: min_skews = 1 max_skews = 2 num_skewed_nodes = np.random.randint(min_skews, max_skews+1) # randomly choose a node skewed_nodes = list(np.random.choice(eps, size=num_skewed_nodes)) if len(skewed_node_probs) == 0: # randomly choose skew between range min_prob = 0.5/num_skewed_nodes max_prob = 0.8/num_skewed_nodes skewed_node_probs = np.random.uniform(min_prob, max_prob, size=num_skewed_nodes) skewed_node_probs = list(skewed_node_probs) skewed_node_probs_dict = {node: prob for node, prob in zip(skewed_nodes, skewed_node_probs)} if print_data: print('Num skewed nodes: {}'.format(num_skewed_nodes)) print('Chosen for skew:\n{}'.format(skewed_node_probs_dict.keys())) print('Chosen probs:\n{}'.format(skewed_node_probs_dict.values())) # find prob of each skewed node pair being chosen pairs_per_node = num_nodes - 1 probs_per_skewed_pair = {node: prob for node, prob in zip(skewed_nodes, [p/pairs_per_node for p in skewed_node_probs])} # update prob pair chosen for each pair with a skewed node prob_pair_chosen = {pair: 0 for pair in pair_to_index.keys()} unskewed_pairs = {pair: 0 for pair in pair_to_index.keys()} # maintain for efficient hashing for node in probs_per_skewed_pair.keys(): # for pair_idx in range(pairs_per_node): for pair_idx in range(num_nodes): if index_to_node[pair_idx] != node: pair = json.dumps([node, index_to_node[pair_idx]]) try: prob_pair_chosen[pair] += probs_per_skewed_pair[node]/2 try: del unskewed_pairs[pair] except KeyError: # already registered as no longer being unskewed pass except KeyError: pair = json.loads(pair) pair = [pair[1],pair[0]] pair = json.dumps(pair) prob_pair_chosen[pair] += probs_per_skewed_pair[node]/2 # allocate 2x so divide by 2 try: del unskewed_pairs[pair] except KeyError: # already registered as no longer being unskewed pass total_skew_prob = np.sum(list(prob_pair_chosen.values())) # assign prob pair chosen to any pairs w/o skewed nodes if total_skew_prob < 0.5: num_remaining_pairs = list(prob_pair_chosen.values()).count(0) prob_dist = np.ones((num_remaining_pairs))/((num_remaining_pairs)) num_experiments = num_remaining_pairs * 100 counter_array = np.random.multinomial(num_experiments, prob_dist, size=1)[0] counter_array_prob_dist = (counter_array/(num_experiments*2)) counter_array_prob_dist = ((0.5-total_skew_prob)/0.5) * counter_array_prob_dist iterable = np.nditer(counter_array_prob_dist) for pair in unskewed_pairs.keys(): prob_pair_chosen[pair] = next(iterable) if print_data: print('Prob pair chosen:\n{}'.format(prob_pair_chosen)) # assign probabilites to normalised demand matrix node_dist = assign_probs_to_matrix(eps=eps, probs=list(prob_pair_chosen.values()), matrix=node_dist) if rack_prob_config is not None: # adjust node prob dist to account for rack prob config node_dist = adjust_node_dist_for_rack_prob_config(rack_prob_config, eps, node_dist, print_data=print_data) matrix_sum = np.round(np.sum(node_dist),2) assert matrix_sum == 1, \ 'matrix must sum to 1, but is {}'.format(matrix_sum) if print_data: print('Normalised matrix:\n{}'.format(node_dist)) print('Normalised matrix sum: {}'.format(matrix_sum)) if path_to_save is not None: tools.pickle_data(path_to_save, node_dist) if plot_fig or show_fig: fig = plot_dists.plot_node_dist(node_dist=node_dist, eps=eps, show_fig=show_fig) return node_dist, fig else: return node_dist
# def gen_multimodal_node_dist(eps, # skewed_nodes=[], # skewed_node_probs=[], # num_skewed_nodes=None, # rack_prob_config=None, # path_to_save=None, # plot_fig=False, # show_fig=False, # print_data=False): # '''Generates a multimodal node distribution. # Generates a multimodal node demand distribution i.e. certain nodes # have a certain specified probability of being chosen. If no # skewed nodes given, randomly selects random no. node(s) to skew. If no # skew node probabilities given, random selects probability with which # to skew the node between 0.5 and 0.8. If no num skewed nodes given, # randomly chooses number of nodes to skew. # Args: # eps (list): List of network node endpoints that can act as sources # & destinations # skewed_nodes (list): Node(s) to whose probability of being # chosen you want to skew/specify # skewed_node_probs (list): Probabilit(y)(ies) of node(s) # being chosen/specified skews # num_skewed_nodes (int): Number of nodes to skew. If None, will gen # a number between 10% and 30% of the total number of nodes in network # rack_prob_config (dict): Network endpoints/servers are often grouped into # physically local clusters or `racks'. Different networks may have # different levels of inter- (between) and intra- (within) rack communication. # If rack_prob_config is left as None, will assume that server-server # srs-dst requests are independent of which rack they might be in. # If specified, dict should have a `racks_dict' key, whose value is a dict # with keys as rack labels (e.g. 'rack_0', 'rack_1' etc.) and whose value # for each key is a list of the endpoints in the respective rack # (e.g. [`server_0', `server_24', `server_56', ...]), and a `prob_inter_rack' # key whose value is a float (e.g. 0.9), setting the probability # that a chosen src endpoint has a destination which is outside of its rack. # If you want to e.g. designate an entire rack as a 'hot rack' (many # traffic requests occur from this rack), would specify skewed_nodes to # contain the list of servers in this rack and configure rack_prob_config # appropriately. # path_to_save (str): Path to directory (with file name included) in which # to save generated distribution. E.g. path_to_save='data/dists/my_dist'. # plot_fig (bool): Whether or not to plot fig. If True, will return fig. # show_fig (bool): Whether or not to plot and show fig. If True, will # return and display fig. # print_data (bool): Whether or not to print extra information about the # generated data. # Returns: # tuple: Tuple containing: # - **node_dist** (*numpy array*): 2D matrix array of souce-destination pair # probabilities of being chosen. # - **fig** (*matplotlib.figure.Figure, optional*): Node distributions # plotted as a 2D matrix. To return, set show_fig=True and/or plot_fig=True. # ''' # # initialise graph params # num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps) # node_dist = np.zeros((num_nodes, num_nodes)) # if num_skewed_nodes is None: # num_skewed_nodes = len(skewed_nodes) # if len(skewed_nodes) == 0: # if num_skewed_nodes == 0: # # randomly choose number of nodes to skew # min_skews = int(0.1*num_nodes) # max_skews = int(0.3*num_nodes) # if min_skews < 1: # min_skews = 1 # max_skews = 2 # num_skewed_nodes = np.random.randint(min_skews, max_skews+1) # # randomly choose a node # skewed_nodes = list(np.random.choice(eps, size=num_skewed_nodes)) # if len(skewed_node_probs) == 0: # # randomly choose skew between range # min_prob = 0.5/num_skewed_nodes # max_prob = 0.8/num_skewed_nodes # skewed_node_probs = np.random.uniform(min_prob, # max_prob, # size=num_skewed_nodes) # skewed_node_probs = list(np.round(skewed_node_probs,2)) # if print_data: # print('Num skewed nodes: {}'.format(num_skewed_nodes)) # print('Chosen for skew:\n{}'.format(skewed_nodes)) # print('Chosen probs:\n{}'.format(skewed_node_probs)) # # get indices of node pairs to skew # pairs_per_node = num_nodes - 1 # skewed_node_pair_indices = np.zeros((pairs_per_node, num_skewed_nodes)) # for skewed_node_iter in range(num_skewed_nodes): # skewed_node_pair_iter = 0 # matrix_iter = 0 # for src in eps: # for dst in eps: # if src == dst: # continue # elif src > dst: # continue # else: # skewed_node = skewed_nodes[skewed_node_iter] # if skewed_node == src or skewed_node == dst: # skewed_node_pair_indices[skewed_node_pair_iter,skewed_node_iter] = matrix_iter # skewed_node_pair_iter+=1 # matrix_iter += 1 # if print_data: # print('Skewed node pair indices:\n{}'.format(skewed_node_pair_indices)) # # find prob of each skewed node pair being chosen # probs_per_skewed_pair = np.zeros(num_skewed_nodes) # for node in range(num_skewed_nodes): # probs_per_skewed_pair[node] = skewed_node_probs[node] / pairs_per_node # # update prob pair chosen for each pair with a skewed node # prob_pair_chosen = np.zeros(num_pairs) # iter = np.nditer(skewed_node_pair_indices) # for skewed_node_iter in range(num_skewed_nodes): # for skewed_node_pair_iter in range(pairs_per_node): # for pair in range(num_pairs): # if pair == skewed_node_pair_indices[skewed_node_pair_iter,skewed_node_iter]: # # add to skew of node # prob_pair_chosen[pair] += probs_per_skewed_pair[skewed_node_iter] # else: # continue # # will allocate twice # prob_pair_chosen = prob_pair_chosen / 2 # total_skew_prob = np.sum(prob_pair_chosen) # # assign prob pair chosen to any pairs w/o skewed nodes # if total_skew_prob < 0.5: # remaining_pairs = np.count_nonzero(prob_pair_chosen == 0) # prob_dist = np.ones((remaining_pairs))/((remaining_pairs)) # counter_array = np.random.multinomial(500, # prob_dist, # size=1)[0] # counter_array_prob_dist = (counter_array/1000) # counter_array_prob_dist = ((0.5-total_skew_prob)/0.5) * counter_array_prob_dist # iter = np.nditer(counter_array_prob_dist) # for pair in range(len(prob_pair_chosen)): # if prob_pair_chosen[pair] == 0: # prob_pair_chosen[pair] = next(iter) # if print_data: # print('Prob pair chosen:\n{}'.format(prob_pair_chosen)) # # assign probabilites to normalised demand matrix # node_dist = assign_probs_to_matrix(eps=eps, # probs=prob_pair_chosen, # matrix=node_dist) # if rack_prob_config is not None: # # adjust node prob dist to account for rack prob config # node_dist = adjust_node_dist_for_rack_prob_config(rack_prob_config, # eps, # node_dist, # print_data=print_data) # matrix_sum = np.round(np.sum(node_dist),2) # assert matrix_sum == 1, \ # 'matrix must sum to 1, but is {}'.format(matrix_sum) # if print_data: # print('Normalised matrix:\n{}'.format(node_dist)) # print('Normalised matrix sum: {}'.format(matrix_sum)) # if path_to_save is not None: # tools.pickle_data(path, node_dist) # if plot_fig or show_fig: # fig = plot_dists.plot_node_dist(node_dist=node_dist, # eps=eps, # show_fig=show_fig) # return node_dist, fig # else: # return node_dist
[docs]def get_suitable_destination_node_for_rack_config(sn, node_dist, eps, ep_to_rack, rack_to_ep, inter_rack): '''Given source node, finds destination node given inter and intra rack config.''' sn_rack = ep_to_rack[sn] # get list of suitable destination nodes dn_eps = [] if inter_rack: # get list of inter rack destination nodes for this source node for rack in rack_to_ep.keys(): if rack != sn_rack: rack_eps = rack_to_ep[rack] dn_eps.append(rack_eps) else: # get list of intra rack destination nodes for this source node for rack in rack_to_ep.keys(): if rack == sn_rack: rack_eps = rack_to_ep[rack] dn_eps.append(rack_eps) # flatten dn_eps_flat = [] for l in dn_eps: for d in l: dn_eps_flat.append(d) # find suitable destination for source node dn = copy.deepcopy(sn) while sn == dn: dn = gen_demand_nodes(eps=dn_eps_flat, node_dist=node_dist, size=1, axis=1, check_sum_valid=False)[0] return dn
[docs]def get_inter_intra_rack_pair_prob_dicts(pair_prob_dict, ep_to_rack_dict): inter_rack_pair_prob_dict = {} intra_rack_pair_prob_dict = {} for pair in list(pair_prob_dict.keys()): pair_loaded = json.loads(pair) src, dst = pair_loaded[0], pair_loaded[1] if ep_to_rack_dict[src] == ep_to_rack_dict[dst]: # intra-rack try: intra_rack_pair_prob_dict[pair] = pair_prob_dict[pair] except KeyError: pair = json.loads(pair) pair = json.dumps([pair[1],pair[0]]) intra_rack_pair_prob_dict[pair] = pair_prob_dict[pair] else: # inter-rack try: inter_rack_pair_prob_dict[pair] = pair_prob_dict[pair] except KeyError: pair = json.loads(pair) pair = json.dumps([pair[1],pair[0]]) inter_rack_pair_prob_dict[pair] = pair_prob_dict[pair] return inter_rack_pair_prob_dict, intra_rack_pair_prob_dict
[docs]def adjust_node_dist_for_rack_prob_config(rack_prob_config, eps, node_dist, print_data=False): '''Unlike the other adjust_node_dist_from_multinomial_exp_for_rack_prob_config function, this function does not use a multinomial experiment to adjust the prob dist, but rather uses a deterministic method of distorting the probabilities from the original node distribution such that the required inter-/intra-rack probabilities are met. ''' num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps) index_to_pair, pair_to_index = get_network_pair_mapper(eps) ep_to_rack_dict = {} for key, val in rack_prob_config['racks_dict'].items(): for v in val: if v not in ep_to_rack_dict.keys(): ep_to_rack_dict[v] = key pair_prob_dict = get_pair_prob_dict_of_node_dist_matrix(node_dist, eps) inter_rack_pair_prob_dict, intra_rack_pair_prob_dict = get_inter_intra_rack_pair_prob_dicts(pair_prob_dict, ep_to_rack_dict) # get current inter intra rack probs inter_rack_prob = sum(list(inter_rack_pair_prob_dict.values())) intra_rack_prob = sum(list(intra_rack_pair_prob_dict.values())) if print_data: print('inter_rack_prob: {}'.format(inter_rack_prob)) print('intra_rack_prob: {}'.format(intra_rack_prob)) print('sum: {}'.format(inter_rack_prob+intra_rack_prob)) target_inter_rack_prob = rack_prob_config['prob_inter_rack'] / 2 # allocate 2x so divide by 2 diff_inter_rack_prob = target_inter_rack_prob - inter_rack_prob diff_per_inter_rack_pair = diff_inter_rack_prob / len(list(inter_rack_pair_prob_dict.keys())) diff_per_intra_rack_pair = -(diff_inter_rack_prob / len(list(intra_rack_pair_prob_dict.keys()))) if print_data: print('target_inter_rack_prob: {}'.format(target_inter_rack_prob)) print('diff_inter_rack_prob: {}'.format(diff_inter_rack_prob)) print('diff_per_inter_rack_pair: {}'.format(diff_per_inter_rack_pair)) print('diff_per_intra_rack_pair: {}'.format(diff_per_intra_rack_pair)) # adjust probs so have desired inter intra rack probs # update inter rack probs for inter_rack_pair in inter_rack_pair_prob_dict.keys(): updated_prob = inter_rack_pair_prob_dict[inter_rack_pair] + diff_per_inter_rack_pair if updated_prob < 0: # cant have 0 probs, take away from max prob instead pairs = list(inter_rack_pair_prob_dict.keys()) probs = list(inter_rack_pair_prob_dict.values()) highest_prob_pair = pairs[probs.index(max(probs))] inter_rack_pair_prob_dict[highest_prob_pair] += diff_per_inter_rack_pair if inter_rack_pair_prob_dict[highest_prob_pair] < 0: # raise Exception('Negative {} probability encountered.'.format(inter_rack_pair_prob_dict[highest_prob_pair])) inter_rack_pair_prob_dict[highest_prob_pair] = 0 else: # wont have 0 probs, can apply update inter_rack_pair_prob_dict[inter_rack_pair] = updated_prob # update intra rack probs for intra_rack_pair in intra_rack_pair_prob_dict.keys(): updated_prob = intra_rack_pair_prob_dict[intra_rack_pair] + diff_per_intra_rack_pair if updated_prob < 0: # cant have 0 probs, take away from max prob instead pairs = list(intra_rack_pair_prob_dict.keys()) probs = list(intra_rack_pair_prob_dict.values()) highest_prob_pair = pairs[probs.index(max(probs))] intra_rack_pair_prob_dict[highest_prob_pair] += diff_per_intra_rack_pair if intra_rack_pair_prob_dict[highest_prob_pair] < 0: # raise Exception('Negative {} probability encountered.'.format(intra_rack_pair_prob_dict[highest_prob_pair])) intra_rack_pair_prob_dict[highest_prob_pair] = 0 else: # wont have 0 probs, can apply update intra_rack_pair_prob_dict[intra_rack_pair] = updated_prob inter_rack_prob = sum(list(inter_rack_pair_prob_dict.values())) intra_rack_prob = sum(list(intra_rack_pair_prob_dict.values())) if print_data: print('inter_rack_prob: {}'.format(inter_rack_prob)) print('intra_rack_prob: {}'.format(intra_rack_prob)) print('sum: {}'.format(inter_rack_prob+intra_rack_prob)) # create new adjusted pair_prob_dict adjusted_pair_prob_dict = {} for pair, prob in inter_rack_pair_prob_dict.items(): adjusted_pair_prob_dict[pair] = prob for pair, prob in intra_rack_pair_prob_dict.items(): adjusted_pair_prob_dict[pair] = prob # correct any minor errors in python floating point arithmetic adjusted_pair_prob_dict = adjust_probability_dict_sum(adjusted_pair_prob_dict, target_sum=0.5) if print_data: print('adjusted sum: {}'.format(sum(list(adjusted_pair_prob_dict.values())))) # assign to create adjusted prob matrix node_dist = assign_probs_to_matrix(eps=eps, probs=adjusted_pair_prob_dict, matrix=node_dist) return node_dist
[docs]def convert_sampled_pairs_into_node_dist(sampled_pairs, eps): # convert sampled pairs dict to rand vars sampled_pairs = val_dists.convert_key_occurrences_to_data(list(sampled_pairs.keys()), list(sampled_pairs.values())) # convert sampled pairs list into prob dist unique_vals, pmf = val_dists.gen_discrete_prob_dist(sampled_pairs) sampled_pair_prob_dist = {unique_var: prob for unique_var, prob in zip(unique_vals, pmf)} # insert 0 probabilites for any pairs that were never sampled num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps) index_to_pair, pair_to_index = get_network_pair_mapper(eps) if num_pairs != len(sampled_pair_prob_dist.keys()): # some pairs were never chosen, find which are missing chosen_pair_indices = {} for pair in sampled_pair_prob_dist.keys(): try: chosen_pair_indices[pair_to_index[pair]] = None except KeyError: pair = json.loads(pair) pair = [pair[1],pair[0]] pair = json.dumps(pair) chosen_pair_indices[pair_to_index[pair]] = None for index in index_to_pair.keys(): if index not in chosen_pair_indices: # index is missing, update sampled pairs dict with 0 prob for this index sampled_pair_prob_dist[json.dumps(index_to_pair[index])] = 0 # convert prob dist into dict whose keys are node dist matrix indices matrix_pair_prob_dist = convert_pair_prob_dist_dict_to_matrix_pair_prob_dist_dict(sampled_pair_prob_dist, eps) # sort so that get correct ordering of pair probs when assign to node matrix sorted_matrix_pair_prob_dist_keys = sorted((matrix_pair_prob_dist.keys())) # generate new node matrix prob dist (which now accounts for rack probs) prob_pair_chosen = [] for key in sorted_matrix_pair_prob_dist_keys: prob = matrix_pair_prob_dist[key]/2 # divide by 2 since one side of diagonal prob_pair_chosen.append(prob) node_dist = assign_probs_to_matrix(eps=eps, probs=prob_pair_chosen) return node_dist
[docs]def convert_pair_prob_dist_dict_to_matrix_pair_prob_dist_dict(pair_prob_dist, eps): ''' Args: pair_prob_dist (dict): Dict whose keys are node pairs and whose values are probabilities or fractions. Returns: matrix_pair_prob_dist (dict): Dict whose keys are matrix indices of the node pairs and whose values are the pairs' corresponding probabilities or fractions. ''' # convert prob dist into dict whose keys are node dist matrix indices index_to_pair, pair_to_index = get_network_pair_mapper(eps) matrix_pair_prob_dist = {} for key in pair_prob_dist.keys(): try: matrix_index = pair_to_index[key] except KeyError: # switch src and dst pair = json.loads(key) key = json.dumps([pair[1], pair[0]]) matrix_index = pair_to_index[key] try: matrix_pair_prob_dist[matrix_index] = pair_prob_dist[key] except KeyError: # switch src and dst back for pair prob dist pair = json.loads(key) key = json.dumps([pair[1], pair[0]]) matrix_pair_prob_dist[matrix_index] = pair_prob_dist[key] return matrix_pair_prob_dist
[docs]def adjust_node_dist_from_multinomial_exp_for_rack_prob_config(rack_prob_config, eps, node_dist, num_exps_factor=2, print_data=False): '''Unlike the other adjust_node_dist_for_rack_prob_config function, this function adjusts the node dist by running multinomial experiments on the initial node distribution to sample from it. It therefore takes much much longer than the other function, especially for networks with >1,000 nodes. Takes node dist and uses it to generate new node dist given inter- and intra-rack configuration. Different DCNs have different inter and intra rack traffic. This function allows you to specify how much of your traffic should be inter and intra rack. Args: rack_prob_config (dict): Network endpoints/servers are often grouped into physically local clusters or `racks'. Different networks may have different levels of inter- (between) and intra- (within) rack communication. If rack_prob_config is left as None, will assume that server-server srs-dst requests are independent of which rack they might be in. If specified, dict should have a `racks_dict' key, whose value is a dict with keys as rack labels (e.g. 'rack_0', 'rack_1' etc.) and whose value for each key is a list of the endpoints in the respective rack (e.g. [`server_0', `server_24', `server_56', ...]), and a `prob_inter_rack' key whose value is a float (e.g. 0.9), setting the probability that a chosen src endpoint has a destination which is outside of its rack. If you want to e.g. designate an entire rack as a 'hot rack' (many traffic requests occur from this rack), would specify skewed_nodes to contain the list of servers in this rack and configure rack_prob_config appropriately. eps (list): List of network node endpoints that can act as sources & destinations. node_dist (numpy array): 2D matrix array of source-destination pair probabilities of being chosen. num_exps_factor (int): Factor by which to multiply number of ep pairs to get the number of multinomial experiments to run when generating new node dist. print_data (bool): Whether or not to print extra information about the generated data. ''' node_dist = copy.deepcopy(node_dist) # switch racks_dict keys and values to make hashing easier racks_dict = {} for key, val in rack_prob_config['racks_dict'].items(): for v in val: if v not in racks_dict.keys(): racks_dict[v] = key # run multinomial exp to incpororate rack probabilities num_pairs = np.int(((len(eps)**2) - len(eps))/2) num_experiments = num_exps_factor * num_pairs sampled_pairs = {} inter_counter = 0 intra_counter = 0 for exp in range(num_experiments): # sample if connection should be intra or inter rack inter_rack = np.random.choice(a=[True, False], p=[rack_prob_config['prob_inter_rack'], 1-rack_prob_config['prob_inter_rack']]) if inter_rack: inter_counter += 1 else: intra_counter += 1 # sample a source node sn = gen_demand_nodes(eps=eps, node_dist=node_dist, size=1, axis=0, check_sum_valid=False)[0] # sample destination node given inter_rack config dn = get_suitable_destination_node_for_rack_config(sn, copy.deepcopy(node_dist), eps, ep_to_rack=racks_dict, rack_to_ep=rack_prob_config['racks_dict'], inter_rack=inter_rack) pair = json.dumps([sn, dn]) if pair not in sampled_pairs: # check if switched src-dst pair that already occurred pair_switched = json.dumps([dn, sn]) if pair_switched not in sampled_pairs: sampled_pairs[pair] = 1 # init first occurrence of pair else: # pair already seen before sampled_pairs[pair_switched] += 1 else: # pair already seen before sampled_pairs[pair] += 1 # convert sampled pairs dict to rand vars sampled_pairs = val_dists.convert_key_occurrences_to_data(list(sampled_pairs.keys()), list(sampled_pairs.values())) if print_data: total = inter_counter + intra_counter print('Number inter-rack requests: {} ({}%)'.format(inter_counter, inter_counter*100/total)) print('Number intra-rack requests: {} ({}%)'.format(intra_counter, intra_counter*100/total)) # convert sampled pairs list into prob dist unique_vals, pmf = val_dists.gen_discrete_prob_dist(sampled_pairs) sampled_pair_prob_dist = {unique_var: prob for unique_var, prob in zip(unique_vals, pmf)} # insert 0 probabilites for any pairs that were never sampled in multinomial exp num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps) index_to_pair, pair_to_index = get_network_pair_mapper(eps) if num_pairs != len(sampled_pair_prob_dist.keys()): # some pairs were never chosen, find which are missing chosen_pair_indices = {} for pair in sampled_pair_prob_dist.keys(): try: chosen_pair_indices[pair_to_index[pair]] = None except KeyError: pair = json.loads(pair) pair = [pair[1],pair[0]] pair = json.dumps(pair) chosen_pair_indices[pair_to_index[pair]] = None for index in index_to_pair.keys(): if index not in chosen_pair_indices: # index is missing, update sampled pairs dict with 0 prob for this index sampled_pair_prob_dist[json.dumps(index_to_pair[index])] = 0 # convert prob dist into dict whose keys are node dist matrix indices index_to_pair, pair_to_index = get_network_pair_mapper(eps) matrix_pair_prob_dist = {} for key in sampled_pair_prob_dist.keys(): try: matrix_index = pair_to_index[key] except KeyError: # switch src and dst pair = json.loads(key) key = json.dumps([pair[1], pair[0]]) matrix_index = pair_to_index[key] try: matrix_pair_prob_dist[matrix_index] = sampled_pair_prob_dist[key] except KeyError: # switch src and dst back for sampled pair prob dist pair = json.loads(key) key = json.dumps([pair[1], pair[0]]) matrix_pair_prob_dist[matrix_index] = sampled_pair_prob_dist[key] # sort so that get correct ordering of pair probs when assign to node matrix sorted_matrix_pair_prob_dist_keys = sorted((matrix_pair_prob_dist.keys())) # generate new node matrix prob dist (which now accounts for rack probs) prob_pair_chosen = [] for key in sorted_matrix_pair_prob_dist_keys: prob = matrix_pair_prob_dist[key]/2 # divide by 2 since one side of diagonal prob_pair_chosen.append(prob) if print_data: print('Prob pair chosen after accounting for rack prob config:\n{}'.format(prob_pair_chosen)) print(len(prob_pair_chosen)) node_dist = assign_probs_to_matrix(eps=eps, probs=prob_pair_chosen, matrix=node_dist) return node_dist
[docs]def get_network_pair_mapper(eps): '''Gets dicts mapping network endpoint indices to and from node dist matrix.''' num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps) index_to_pair = {} pair_to_index = {} pair_index = 0 for src in eps: for dst in eps: if src == dst: continue elif node_to_index[src] > node_to_index[dst]: # making symmetric so skip this side of diagonal continue else: index_to_pair[pair_index] = [src, dst] pair_to_index[json.dumps([src, dst])] = pair_index pair_index += 1 return index_to_pair, pair_to_index
[docs]def gen_multimodal_node_pair_dist(eps, skewed_pairs = [], skewed_pair_probs = [], num_skewed_pairs=None, rack_prob_config=None, path_to_save=None, plot_fig=False, show_fig=False, print_data=False): '''Generates a multimodal node pair distribution. Generates a multimodal node pair demand distribution i.e. certain node pairs have a certain specified probability of being chosen. If no skewed pairs given, randomly selects pair to skew. If no skew pair probabilities given, random selects probability with which to skew the pair between 0.1 and 0.3. If no num skewed pairs given, randomly chooses number of pairs to skew. Args: eps (list): List of network node endpoints that can act as sources & destinations. skewed_pairs (list of lists): List of the node pairs [src,dst] to skew. skewed_pair_probs (list): Probabilities of node pairs being chosen. num_skewed_pairs (int): Number of pairs to randomly skew. rack_prob_config (dict): Network endpoints/servers are often grouped into physically local clusters or `racks'. Different networks may have different levels of inter- (between) and intra- (within) rack communication. If rack_prob_config is left as None, will assume that server-server srs-dst requests are independent of which rack they might be in. If specified, dict should have a `racks_dict' key, whose value is a dict with keys as rack labels (e.g. 'rack_0', 'rack_1' etc.) and whose value for each key is a list of the endpoints in the respective rack (e.g. [`server_0', `server_24', `server_56', ...]), and a `prob_inter_rack' key whose value is a float (e.g. 0.9), setting the probability that a chosen src endpoint has a destination which is outside of its rack. If you want to e.g. designate an entire rack as a 'hot rack' (many traffic requests occur from this rack), would specify skewed_nodes to contain the list of servers in this rack and configure rack_prob_config appropriately. path_to_save (str): Path to directory (with file name included) in which to save generated distribution. E.g. path_to_save='data/dists/my_dist'. plot_fig (bool): Whether or not to plot fig. If True, will return fig. show_fig (bool): Whether or not to plot and show fig. If True, will return and display fig. print_data (bool): Whether or not to print extra information about the generated data. Returns: tuple: Tuple containing: - **node_dist** (*numpy array*): 2D matrix array of souce-destination pair probabilities of being chosen. - **fig** (*matplotlib.figure.Figure, optional*): Node distributions plotted as a 2D matrix. To return, set show_fig=True and/or plot_fig=True. ''' # initialise graph params num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps) index_to_pair, pair_to_index = get_network_pair_mapper(eps) node_dist = np.zeros((num_nodes, num_nodes)) if num_skewed_pairs is None: num_skewed_pairs = len(skewed_pairs) if len(skewed_pairs) == 0: if num_skewed_pairs == 0: #randomly choose number of nodes to skew min_skews = int(0.1*num_pairs) max_skews = int(0.3*num_pairs) if min_skews < 1: min_skews = 1 max_skews = 2 num_skewed_pairs = np.random.randint(min_skews, max_skews+1) # randomly choose src and dst for pairs nodes = eps src = np.random.choice(nodes, size=num_skewed_pairs) dst = np.random.choice(nodes, size=num_skewed_pairs) # remove src-dst conflicts and repeated pairs, then add to skewed pairs list for idx in range(num_skewed_pairs): sn, dn = src[idx], dst[idx] pair = [sn, dn] pair_flipped = [dn, sn] while sn == dn or pair in skewed_pairs or pair_flipped in skewed_pairs: dn = np.random.choice(nodes, size=1)[0] pair = [sn, dn] pair_flipped = [sn, dn] dst[idx] = dn skewed_pairs.append([sn, dn]) # keep src<dst convention consistent for pair_iter in range(num_skewed_pairs): pair = skewed_pairs[pair_iter] if node_to_index[pair[0]] > node_to_index[pair[1]]: # swap src and dst to keep convention consistent temp_src = pair[1] temp_dst = pair[0] skewed_pairs[pair_iter] = [temp_src,temp_dst] # update src and dst for pair in range(len(src)): src[pair] = skewed_pairs[pair][0] dst[pair] = skewed_pairs[pair][1] else: # skewed pairs already given assert (any(isinstance(pair,list) for pair in skewed_pairs)), \ 'must enter skewed_pairs as list of lists' src = [pair[0] for pair in skewed_pairs] dst = [pair[1] for pair in skewed_pairs] if print_data: print('Chosen sources:\n{}'.format(src)) print('Chosen destinations:\n{}'.format(dst)) print('Num skewed pairs: {}'.format(num_skewed_pairs)) print('Chosen pairs to skew:\n{}'.format(skewed_pairs)) if len(skewed_pair_probs) == 0: # randomly choose skew between range min_prob = 0.3/num_skewed_pairs max_prob = 0.5/num_skewed_pairs skewed_pair_probs = np.random.uniform(min_prob, max_prob, size=num_skewed_pairs) skewed_pair_probs = list(skewed_pair_probs) if print_data: print('Skew probs:\n{}'.format(skewed_pair_probs)) # find prob of each skewed node pair being chosen pairs_per_node = num_nodes - 1 prob_pair_chosen = {pair: 0 for pair in pair_to_index.keys()} _skewed_pairs = [] for p in skewed_pairs: # ensure same order if json.dumps(p) not in prob_pair_chosen: _skewed_pairs.append(json.dumps(p[::-1])) else: _skewed_pairs.append(json.dumps(p)) skewed_pairs = _skewed_pairs probs_per_skewed_pair = {pair: prob for pair, prob in zip(skewed_pairs, [p for p in skewed_pair_probs])} # update prob pair chosen for each pair with a skewed node unskewed_pairs = {pair: 0 for pair in pair_to_index.keys()} # maintain for efficient hashing for skewed_pair in probs_per_skewed_pair.keys(): try: prob_pair_chosen[skewed_pair] += probs_per_skewed_pair[skewed_pair]/2 try: del unskewed_pairs[skewed_pair] except KeyError: # already registered as no longer being unskewed pass except KeyError: skewed_pair = json.loads(skewed_pair) skewed_pair = [skewed_pair[1],skewed_pair[0]] skewed_pair = json.dumps(skewed_pair) prob_pair_chosen[skewed_pair] += probs_per_skewed_pair[skewed_pair]/2 try: del unskewed_pairs[skewed_pair] except KeyError: # already registered as no longer being unskewed pass total_skew_prob = np.sum(list(prob_pair_chosen.values())) # assign prob pair chosen to any pairs w/o skewed nodes if total_skew_prob < 0.5: num_remaining_pairs = list(prob_pair_chosen.values()).count(0) prob_dist = np.ones((num_remaining_pairs))/((num_remaining_pairs)) num_experiments = num_remaining_pairs * 100 counter_array = np.random.multinomial(num_experiments, prob_dist, size=1)[0] counter_array_prob_dist = (counter_array/(num_experiments*2)) counter_array_prob_dist = ((0.5-total_skew_prob)/0.5) * counter_array_prob_dist iterable = np.nditer(counter_array_prob_dist) for pair in unskewed_pairs.keys(): prob_pair_chosen[pair] = next(iterable) if print_data: print('Prob pair chosen:\n{}'.format(prob_pair_chosen)) # assign probabilites to normalised demand matrix node_dist = assign_probs_to_matrix(eps=eps, probs=list(prob_pair_chosen.values()), matrix=node_dist) if rack_prob_config is not None: # adjust node prob dist to account for rack prob config node_dist = adjust_node_dist_for_rack_prob_config(rack_prob_config, eps, node_dist, print_data=print_data) matrix_sum = np.round(np.sum(node_dist),2) assert matrix_sum == 1, \ 'matrix must sum to 1, but is {}'.format(matrix_sum) if print_data: print('Normalised matrix:\n{}'.format(node_dist)) print('Normalised matrix sum: {}'.format(matrix_sum)) if path_to_save is not None: tools.pickle_data(path_to_save, node_dist) if plot_fig or show_fig: fig = plot_dists.plot_node_dist(node_dist=node_dist, eps=eps, show_fig=show_fig) return node_dist, fig else: return node_dist
# def gen_multimodal_node_pair_dist(eps, # skewed_pairs = [], # skewed_pair_probs = [], # num_skewed_pairs=None, # rack_prob_config=None, # path_to_save=None, # plot_fig=False, # show_fig=False, # print_data=False): # '''Generates a multimodal node pair distribution. # Generates a multimodal node pair demand distribution i.e. certain node # pairs have a certain specified probability of being chosen. If no # skewed pairs given, randomly selects pair to skew. If no skew # pair probabilities given, random selects probability with which # to skew the pair between 0.1 and 0.3. If no num skewed pairs given, # randomly chooses number of pairs to skew. # Args: # eps (list): List of network node endpoints that can act as sources # & destinations. # skewed_pairs (list of lists): List of the node pairs [src,dst] to # skew. # skewed_pair_probs (list): Probabilities of node pairs being # chosen. # num_skewed_pairs (int): Number of pairs to randomly skew. # rack_prob_config (dict): Network endpoints/servers are often grouped into # physically local clusters or `racks'. Different networks may have # different levels of inter- (between) and intra- (within) rack communication. # If rack_prob_config is left as None, will assume that server-server # srs-dst requests are independent of which rack they might be in. # If specified, dict should have a `racks_dict' key, whose value is a dict # with keys as rack labels (e.g. 'rack_0', 'rack_1' etc.) and whose value # for each key is a list of the endpoints in the respective rack # (e.g. [`server_0', `server_24', `server_56', ...]), and a `prob_inter_rack' # key whose value is a float (e.g. 0.9), setting the probability # that a chosen src endpoint has a destination which is outside of its rack. # If you want to e.g. designate an entire rack as a 'hot rack' (many # traffic requests occur from this rack), would specify skewed_nodes to # contain the list of servers in this rack and configure rack_prob_config # appropriately. # path_to_save (str): Path to directory (with file name included) in which # to save generated distribution. E.g. path_to_save='data/dists/my_dist'. # plot_fig (bool): Whether or not to plot fig. If True, will return fig. # show_fig (bool): Whether or not to plot and show fig. If True, will # return and display fig. # print_data (bool): Whether or not to print extra information about the # generated data. # Returns: # tuple: Tuple containing: # - **node_dist** (*numpy array*): 2D matrix array of souce-destination pair # probabilities of being chosen. # - **fig** (*matplotlib.figure.Figure, optional*): Node distributions # plotted as a 2D matrix. To return, set show_fig=True and/or plot_fig=True. # ''' # # initialise graph params # num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps) # node_dist = np.zeros((num_nodes, num_nodes)) # if num_skewed_pairs is None: # num_skewed_pairs = len(skewed_pairs) # if len(skewed_pairs) == 0: # if num_skewed_pairs == 0: # #randomly choose number of nodes to skew # min_skews = int(0.1*num_pairs) # max_skews = int(0.3*num_pairs) # if min_skews < 1: # min_skews = 1 # max_skews = 2 # num_skewed_pairs = np.random.randint(min_skews, max_skews+1) # # randomly choose src and dst for pairs # nodes = eps # src = np.random.choice(nodes, size=num_skewed_pairs) # dst = np.random.choice(nodes, size=num_skewed_pairs) # # remove src-dst conflicts and repeated pairs # for pair_iter in range(num_skewed_pairs): # try: # pair = skewed_pairs[pair_iter] # except IndexError: # pair = [] # while (skewed_pairs.count(pair) > 1 or src[pair_iter] == dst[pair_iter]): # # remove repeated pairs and src-dst conflicts # dst[pair_iter] = np.random.choice(nodes, size=1)[0] # skewed_pairs.append([src[pair_iter],dst[pair_iter]]) # # keep src<dst convention consistent # for pair_iter in range(num_skewed_pairs): # pair = skewed_pairs[pair_iter] # if pair[0] > pair[1]: # # swap src and dst to keep convention consistent # temp_src = pair[1] # temp_dst = pair[0] # skewed_pairs[pair_iter] = [temp_src,temp_dst] # # update src and dst # for pair in range(len(src)): # src[pair] = skewed_pairs[pair][0] # dst[pair] = skewed_pairs[pair][1] # else: # assert (any(isinstance(pair,list) for pair in skewed_pairs)), \ # 'must enter skewed_pairs as list of lists' # src = [pair[0] for pair in skewed_pairs] # dst = [pair[1] for pair in skewed_pairs] # if print_data: # print('Chosen sources:\n{}'.format(src)) # print('Chosen destinations:\n{}'.format(dst)) # print('Num skewed pairs: {}'.format(num_skewed_pairs)) # print('Chosen pairs to skew:\n{}'.format(skewed_pairs)) # if len(skewed_pair_probs) == 0: # # randomly choose skew between range # min_prob = 0.3/num_skewed_pairs # max_prob = 0.5/num_skewed_pairs # skewed_pair_probs = np.random.uniform(min_prob, # max_prob, # size=num_skewed_pairs) # skewed_pair_probs = list(skewed_pair_probs) # if print_data: # print('Skew probs:\n{}'.format(skewed_pair_probs)) # # get indices of node pairs to skew # skewed_pair_indices = np.zeros(len(skewed_pairs)) # skewed_pair_iter = 0 # for pair in range(len(skewed_pairs)): # matrix_iter = 0 # for src in eps: # for dst in eps: # if src == dst: # continue # elif src > dst: # continue # else: # if skewed_pairs[pair][0] == src and skewed_pairs[pair][1] == dst: # skewed_pair_indices[skewed_pair_iter] = matrix_iter # skewed_pair_iter += 1 # elif skewed_pairs[pair][0] == dst and skewed_pairs[pair][1] == src: # skewed_pair_indices[skewed_pair_iter] = matrix_iter # skewed_pair_iter += 1 # matrix_iter += 1 # if print_data: # print('Skew indices:\n{}'.format(skewed_pair_indices)) # # update prob pair chosen for each skewed pair # prob_pair_chosen = np.zeros(num_pairs) # iter = np.nditer(np.asarray(skewed_pair_probs)) # for skewed_pair_iter in range(num_skewed_pairs): # for pair_iter in range(num_pairs): # if pair_iter == skewed_pair_indices[skewed_pair_iter]: # # add to skew of pair # prob = next(iter) # prob_pair_chosen[pair_iter] += prob # else: # continue # # will allocate twice # prob_pair_chosen = prob_pair_chosen / 2 # total_skew_prob = np.sum(prob_pair_chosen) # # assign prob pair chosen to any pairs w/o skew # if total_skew_prob < 0.5: # remaining_pairs = np.count_nonzero(prob_pair_chosen == 0) # prob_dist = np.ones((remaining_pairs))/((remaining_pairs)) # counter_array = np.random.multinomial(500, # prob_dist, # size=1)[0] # counter_array_prob_dist = (counter_array/1000) # counter_array_prob_dist = ((0.5-total_skew_prob)/0.5) * counter_array_prob_dist # iter = np.nditer(counter_array_prob_dist) # for pair in range(len(prob_pair_chosen)): # if prob_pair_chosen[pair] == 0: # prob_pair_chosen[pair] = next(iter) # if print_data: # print('Prob pair chosen:\n{}'.format(prob_pair_chosen)) # # assign probabilites to normalised demand matrix # node_dist = assign_probs_to_matrix(eps=eps, # probs=prob_pair_chosen, # matrix=node_dist) # if rack_prob_config is not None: # # adjust node prob dist to account for rack prob config # node_dist = adjust_node_dist_for_rack_prob_config(rack_prob_config, # eps, # node_dist, # print_data=print_data) # matrix_sum = np.round(np.sum(node_dist),2) # assert matrix_sum == 1, \ # 'matrix must sum to 1, but is {}'.format(matrix_sum) # if print_data: # print('Normalised matrix:\n{}'.format(node_dist)) # print('Normalised matrix sum: {}'.format(matrix_sum)) # if path_to_save is not None: # tools.pickle_data(path, node_dist) # if plot_fig or show_fig: # fig = plot_dists.plot_node_dist(node_dist=node_dist, # eps=eps, # show_fig=show_fig) # return node_dist, fig # else: # return node_dist
[docs]def gen_node_demands(eps, node_dist, num_demands, rack_prob_config=None, duplicate=False, path_to_save=None): '''Uses node distribution to generate src-dst node pair demands. Args: eps (list): List of network node endpoints that can act as sources & destinations. node_dist (numpy array): 2D matrix array of source-destination pair probabilities of being chosen. num_demands (int): Number of src-dst node pairs to generate. duplicate (bool): Whether or not to duplicate src-dst node pairs. Use this is demands you're generating have a 'take down' event as well as an 'establish' event. path_to_save (str): Path to directory (with file name included) in which to save generated distribution. E.g. path_to_save='data/dists/my_dist'. Returns: tuple: Tuple containing: - **sn** (*numpy array*): Selected source nodes. - **dn** (*numpy array*): Selected destination nodes. ''' matrix_sum = np.round(np.sum(node_dist),2) assert matrix_sum == 1, \ 'demand dist matrix must sum to 1, but is {}'.format(matrix_sum) # init if duplicate: sn = np.array(np.zeros((2*num_demands)),dtype=object) dn = np.array(np.zeros((2*num_demands)), dtype=object) else: sn = np.array(np.zeros((num_demands)),dtype=object) dn = np.array(np.zeros((num_demands)), dtype=object) # source nodes sn[:num_demands] = gen_demand_nodes(eps=eps, node_dist=node_dist, size=num_demands, axis=0) if duplicate: sn[num_demands:] = sn[:num_demands] # destination nodes dn[:num_demands] = gen_demand_nodes(eps=eps, node_dist=node_dist, size=num_demands, axis=1) if duplicate: dn[num_demands:] = dn[:num_demands] # remove any src-dst conflicts for request in np.arange(num_demands): while sn[request] == dn[request]: dn[request] = gen_demand_nodes(eps=eps, node_dist=node_dist, size=1, axis=1, check_sum_valid=False)[0] if duplicate: dn[num_demands:] = dn[:num_demands] # duplicate if rack_prob_config is not None: # adjust for rack config # switch racks_dict keys and values to make hasing easier racks_dict = {} for key, val in rack_prob_config['racks_dict'].items(): for v in val: if v not in racks_dict.keys(): racks_dict[v] = key for request in np.arange(num_demands): # sample if connection should be intra or inter rack inter_rack = np.random.choice(a=[True, False], p=[rack_prob_config['prob_inter_rack'], 1-rack_prob_config['prob_inter_rack']]) # sample destination node given inter_rack config dn[request] = get_suitable_destination_node_for_rack_config(sn[request], node_dist, eps, ep_to_rack=racks_dict, rack_to_ep=rack_prob_config['racks_dict'], inter_rack=inter_rack) # start = time.time() # if inter_rack: # # src and dst must be in different racks # while racks_dict[sn[request]] == racks_dict[dn[request]]: # dn[request] = gen_demand_nodes(eps=eps, # node_dist=node_dist, # size=1, # axis=1)[0] # if time.time()-start > 20: # raise Exception('Cannot find src and dst in different racks. Consider adding more racks, reducing number of endpoints per rack, or changing rack_prob_config.') # else: # # src and dst must be in same rack # while racks_dict[sn[request]] != racks_dict[dn[request]]: # dn[request] = gen_demand_nodes(eps=eps, # node_dist=node_dist, # size=1, # axis=1)[0] # if time.time()-start > 20: # raise Exception('Cannot find src and dst in same rack. Consider removing racks, increasing number of endpoints per rack, or changing rack_prob_config.') if duplicate: dn[num_demands:] = dn[:num_demands] if path_to_save is not None: data = {'sn': sn, 'dn': dn} tools.pickle_data(path_to_save, data) return sn, dn
[docs]def gen_demand_nodes(eps, node_dist, size, axis, path_to_save=None, check_sum_valid=True): '''Generates demand nodes following the node_dist distribution Args: eps (list): List of node endpoint labels. node_dist (numpy array): Probability distribution each node is chosen size (int): Number of demand nodes to generate axis (int, 1 or 0): Which axis of normalised node distribution to consider. E.g. If generating src nodes, axis=0. If dst nodes, axis=1 path_to_save (str): Path to directory (with file name included) in which to save generated distribution. E.g. path_to_save='data/dists/my_dist'. check_sum_valid (bool): Whether or not to ensure node dist sums to 1. If need efficiency, should set to False. ''' if check_sum_valid: matrix_sum = np.round(np.sum(node_dist),2) assert matrix_sum == 1, \ 'matrix must sum to 1, but is {}'.format(matrix_sum) if len(eps) != len(np.sum(node_dist,axis=axis)): # must index node dist num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps) ep_node_indices = [node_to_index[ep] for ep in eps] node_dist = node_dist[ep_node_indices] # make sure sums to 1 (sometimes python floating point arithmetic causes incorrect rounding, and will adjust if used above node indexing) probs = adjust_probability_array_sum(np.sum(node_dist, axis=axis), target_sum=1, print_data=False) nodes = np.random.choice(a = eps, size = size, p = probs).astype(object) if path_to_save is not None: tools.pickle_data(path_to_save, nodes) return nodes
[docs]def adjust_probability_array_sum(probs, target_sum=1, print_data=False): '''For array.''' # check probabilites sum to 1 (python floating point arithmetic can cause problems so need this) adjusted_probs = copy.deepcopy(probs) total = np.sum(adjusted_probs) if print_data: print('Initial sum: {}'.format(total)) difference = total - target_sum if difference != 0: # need slight adjustment so probs sum to target diff_per_pair = difference/len(adjusted_probs) for idx in range(len(adjusted_probs)): adjusted_probs[idx] -= diff_per_pair if print_data: print('Final sum: {}'.format(np.sum(adjusted_probs))) return adjusted_probs
[docs]def adjust_probability_dict_sum(probs, target_sum=1, print_data=False): '''For dict.''' # check probabilites sum to 1 (python floating point arithmetic can cause problems so need this) adjusted_probs = copy.deepcopy(probs) total = np.sum(list(adjusted_probs.values())) if print_data: print('Initial sum: {}'.format(total)) difference = total - target_sum if difference != 0: # need slight adjustment so probs sum to target diff_per_pair = difference/len(list(adjusted_probs.keys())) for key in adjusted_probs.keys(): adjusted_probs[key] -= diff_per_pair if print_data: print('Final sum: {}'.format(np.sum(list(adjusted_probs.values())))) return adjusted_probs
[docs]def get_pair_prob_dict_of_node_dist_matrix(node_dist, eps, all_combinations=False, bidirectional=False): '''Gets prob dict of each pair being chosen given node dist of probabilities. If all_combinations, will record pair probabilities for all possible pair combinations i.e. src-dst and dst-src. If False, assumes src-dst==dst-src. If bidirectional, will multiply probabilities by 2 as pair can be src-dst or dst-src. If bidirectional=True -> values sum to 1, if bidirectional=False -> values sum to 0.5. ''' index_to_pair, pair_to_index = get_network_pair_mapper(eps) num_nodes, num_pairs, node_to_index, index_to_node = tools.get_network_params(eps, all_combinations=all_combinations) pair_prob_dict = {pair: 0 for pair in list(pair_to_index.keys())} for src in eps: for dst in eps: src_idx = node_to_index[src] dst_idx = node_to_index[dst] if src_idx == dst_idx: continue if not all_combinations: if src_idx > dst_idx: # making symmetric so skip this side of diagonal continue else: pair = json.dumps([src, dst]) try: pair_prob_dict[pair] = node_dist[src_idx, dst_idx] if bidirectional: pair_prob_dict[pair] += node_dist[dst_idx, src_idx] except KeyError: pair = json.loads(pair) pair = [pair[1],pair[0]] pair = json.dumps(pair) pair_prob_dict[pair] = node_dist[src_idx, dst_idx] if bidirectional: pair_prob_dict[pair] += node_dist[dst_idx, src_idx] else: # not making symmetric since src-dst != dst-src, do not skip any side of diagonal pair = json.dumps([src, dst]) try: pair_prob_dict[pair] = node_dist[src_idx, dst_idx] if bidirectional: pair_prob_dict[pair] += node_dist[dst_idx, src_idx] except KeyError: pair = json.loads(pair) pair = [pair[1],pair[0]] pair = json.dumps(pair) pair_prob_dict[pair] = node_dist[src_idx, dst_idx] if bidirectional: pair_prob_dict[pair] += node_dist[dst_idx, src_idx] return pair_prob_dict