Source code for private_swedish_mind.utils

"""
Utilities
=========

Auxilliary routines for the main module
"""

import  pandas as pd
from shapely.ops import unary_union
import  numpy as np



[docs]def make_hist_mpn_geoms(mpn_geoms, cell_rings): """ taking list of `mpn_geoms` and corresponding list of cell rings `cell_rings`. Makes histogram for population of cell rings based in `mpn_geoms`. """ hist = [] for idx, ring in enumerate(cell_rings): for geom in mpn_geoms: if (geom in ring): hist.append(idx) return hist
[docs]def get_rings_around_cell(cell_idx, vcs, width): """ given the Voronoi cells `vcs`, the index of the cell `cell_idx` and the number of layers `width` creates a list of lists, where the first list contains indexes of `vcs` for the zero layer, the second -- for the first layer etc. """ result = [vcs.iloc[cell_idx:cell_idx + 1]] t = [[cell_idx]] neighbours_dis = result[0].iloc[0].geometry for layer in range(width): neighbours = vcs[vcs.geometry.touches(neighbours_dis)] t.append(neighbours.index.to_list()) result.append(neighbours) neighbours = pd.concat(result) neighbours_dis = unary_union(neighbours.geometry) return t
[docs]def get_vcs_used_area(vcs, data, area_max): """ collecting Voronoi cells visited during all journeys and their area, if it below the given threshould """ unique_vc_indexes_tracks = np.unique(data.explode('vc_index_mpn').dropna().values.flatten() ) # print(unique_vc_indexes_tracks) vc_used = vcs.iloc[unique_vc_indexes_tracks] area = vc_used.geometry.area / 10**6 area = area[area < area_max] return vc_used, area
# def vc_area_splits(area, n_parts): # """ # given the Pandas Series with Voronoi cell areas, sorts the areas by accending order # and splits into given number of parts. # # Returns a list of tuples with splits borders, like `[(0.01, 2.63), (2.71, 24.67), (25.16, 184.09)]` # """ # # splits = np.array_split(area.sort_values().round(2), n_parts) # size_borders = [(el.iloc[0], el.iloc[-1]) for el in splits] # # return size_borders
[docs]def get_splits(load, n_parts, make_int=True): """ given the Pandas Series with Voronoi cell areas, sorts the areas by accending order and splits into given number of parts. Returns a list of tuples with splits borders, like `[(0.01, 2.63), (2.71, 24.67), (25.16, 184.09)]` """ splits = np.array_split(load.sort_values().round(2), n_parts) if make_int: size_borders = [(int(el.iloc[0]), int(el.iloc[-1])) for el in splits] else: size_borders = [(el.iloc[0], el.iloc[-1]) for el in splits] return size_borders
def make_group_load_col(row, vcs, size_): vc_idxs = [el for el in row] # areas = vcs.iloc[vc_idxs].geometry.area/10**6 loads_idxs = vcs.iloc[vc_idxs].num_ids_list return [(el < size_[1]) & (el > size_[0]) for el in loads_idxs] def make_group_col(row, vcs, size_): vc_idxs = [el for el in row] areas = vcs.iloc[vc_idxs].geometry.area / 10**6 return [(el < size_[1]) & (el > size_[0]) for el in areas]
[docs]def make_diffs_ring_histogram_sample_size(hist_data, series_length): """ we take samples of different size from `hist` column, make a histogram for each sample and observe how the difference between it and the histogram for full `hist` column. We learn how the difference evolves with sample size. returns Pandas DF with the differences """ series_diffs = [] series_length = series_length sample_size = [10, 50, 100, 200, 300, 500, len(hist_data)] n_bins = 7 for serie in range(series_length): hist_vals = [] for sample in sample_size: data = hist_data.sample(sample) hst = [el for sublist in data.to_list() for el in sublist] his = np.histogram(hst, bins=range(n_bins), density=True) hist_vals.append(his[0]) diffs = [sum(abs(element - hist_vals[-1])) for element in hist_vals] series_diffs.append(diffs) series_diffs_pd = pd.DataFrame(series_diffs, columns=sample_size) return series_diffs_pd