Source code for private_swedish_mind.utils

"""
Utilities
=========

Auxilliary routines for the main module
"""

import  pandas as pd
from shapely.ops import unary_union
import  numpy as np



[docs]def make_hist_mpn_geoms(mpn_geoms, cell_rings):
    """
    taking list of `mpn_geoms` and corresponding list of cell rings `cell_rings`.
    Makes histogram for population of cell rings based in `mpn_geoms`.


    """

    hist = []
    for idx, ring in enumerate(cell_rings):
        for geom in mpn_geoms:
            if (geom in ring):
                hist.append(idx)
    return hist


[docs]def get_rings_around_cell(cell_idx, vcs, width):
    """
    given the Voronoi cells `vcs`, the index of the cell `cell_idx` and the number of
    layers `width`
    creates a list of lists, where the first list contains indexes  of `vcs` for the zero layer,
    the second -- for the first layer etc.
    """

    result = [vcs.iloc[cell_idx:cell_idx + 1]]
    t = [[cell_idx]]
    neighbours_dis = result[0].iloc[0].geometry
    for layer in range(width):
        neighbours = vcs[vcs.geometry.touches(neighbours_dis)]
        t.append(neighbours.index.to_list())
        result.append(neighbours)
        neighbours = pd.concat(result)
        neighbours_dis = unary_union(neighbours.geometry)

    return t


[docs]def get_vcs_used_area(vcs, data, area_max):
    """
    collecting Voronoi cells visited during all journeys and their area, if it below the given threshould
    """
    unique_vc_indexes_tracks = np.unique(data.explode('vc_index_mpn').dropna().values.flatten()
                                         )
    # print(unique_vc_indexes_tracks)
    vc_used = vcs.iloc[unique_vc_indexes_tracks]

    area = vc_used.geometry.area / 10**6
    area = area[area < area_max]

    return vc_used, area


# def vc_area_splits(area, n_parts):
#     """
#     given the Pandas Series with Voronoi cell areas, sorts the areas by accending order
#     and splits into given number of parts.
#
#     Returns a list of tuples with splits borders, like `[(0.01, 2.63), (2.71, 24.67), (25.16, 184.09)]`
#     """
#
#     splits = np.array_split(area.sort_values().round(2), n_parts)
#     size_borders = [(el.iloc[0], el.iloc[-1]) for el in splits]
#
#     return size_borders


[docs]def get_splits(load, n_parts, make_int=True):
    """
    given the Pandas Series with Voronoi cell areas, sorts the areas by accending order
    and splits into given number of parts.

    Returns a list of tuples with splits borders, like `[(0.01, 2.63), (2.71, 24.67), (25.16, 184.09)]`
    """

    splits = np.array_split(load.sort_values().round(2), n_parts)
    if make_int:
        size_borders = [(int(el.iloc[0]), int(el.iloc[-1])) for el in splits]
    else:
        size_borders = [(el.iloc[0], el.iloc[-1]) for el in splits]

    return size_borders


def make_group_load_col(row, vcs, size_):
    vc_idxs = [el for el in row]
    #     areas = vcs.iloc[vc_idxs].geometry.area/10**6
    loads_idxs = vcs.iloc[vc_idxs].num_ids_list

    return [(el < size_[1]) & (el > size_[0]) for el in loads_idxs]



def make_group_col(row, vcs, size_):
    vc_idxs = [el for el in row]
    areas = vcs.iloc[vc_idxs].geometry.area / 10**6

    return [(el < size_[1]) & (el > size_[0]) for el in areas]



[docs]def make_diffs_ring_histogram_sample_size(hist_data, series_length):
    """
    we take samples of different size from `hist` column, make a histogram for each sample and
    observe how the difference between it and the  histogram for full `hist` column.
    We learn how the difference evolves with sample size.

    returns Pandas DF with the differences
    """

    series_diffs = []
    series_length = series_length
    sample_size = [10, 50, 100, 200, 300, 500, len(hist_data)]
    n_bins = 7

    for serie in range(series_length):
        hist_vals = []
        for sample in sample_size:
            data = hist_data.sample(sample)
            hst = [el for sublist in data.to_list() for el in sublist]
            his = np.histogram(hst, bins=range(n_bins), density=True)
            hist_vals.append(his[0])

        diffs = [sum(abs(element - hist_vals[-1])) for element in hist_vals]
        series_diffs.append(diffs)

    series_diffs_pd = pd.DataFrame(series_diffs, columns=sample_size)
    return series_diffs_pd