Source code for breakwater.database.database

import os
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

from ..utils.exceptions import user_warning

try:
    from mpl_toolkits.basemap import Basemap
except ImportError:
    user_warning('The Basemap package is required to use BreakwaterDatabase') 

[docs]class BreakwaterDatabase:
    """ Breakwater Database

    Import the breakwater database developed by (Allsop et al., 2009)
    consist of completed breakwater projects with data ranging from
    design wave height to contractor. The constructed breakwaters are
    classified by breakwater type, the following types are currently
    included: Rubble Mound, Composite, Berm, Caisson and Revetments.

    The breakwater database is a separate module of :py:obj:`breakwater`
    and can be imported with the following command:

    .. code-block:: python

       from breakwater.database import BreakwaterDatabase

    .. note::
       To use this module the :py:obj:`Basemap` package is required,
       this dependency is additional to the dependencies mentioned in
       Section `2.1 <install.html#Dependencies>`__. See the following
       `link <https://matplotlib.org/basemap/users/installing.html>`__
       for an installation of :py:obj:`Basemap`

    Parameters
    ----------
    update : bool, optional, default: False
        if False the data is not updated and the included data is
        loaded, if True the database is loaded from the :py:attr:`source`

    Attributes
    ----------
    df : pd.DataFrame
        DataFrame of the breakwater database
    source : str
        url of the source
    """

    def __init__(self, update=False):
        """ See help(BreakwaterDatabase) for more info """
        # set source of the database as attribute
        self.source = 'http://kennisbank-waterbouw.tudelft.nl/breakwaters/printall.php'

        # check if data must be updated from the source
        if update:
            # updata data from the source
            dataframes = pd.read_html(self.source)

            # get the correct df from the dfs
            for dataframe in dataframes:
                # check df of the database by checking the number of columns
                if dataframe.shape[1] > 10:
                    # correct df
                    df = dataframe
                    break

            # check if headers are correct
            if df.columns.dtype != object:
                # set first row as header
                header = df.iloc[0]
                df = df[1:]
                df.columns = header

            # set id coluimn as index
            df.set_index('id', inplace=True)
            df.index = df.index.astype(np.int64)

            # fix column names of front and rear
            names = ['armour', '(unit)', 'size', 'slope']
            new_columns = []

            # iterate over the columns
            for i, column in enumerate(df.columns):
                # only columns after front and rear must be changed
                if (column == 'front' or column == 'rear'
                        and pd.isnull(df.columns[i+1:i+4]).any()):
                    # change names of columns
                    for name in names:
                        new_columns.append(f'{column} {name}')

                elif pd.isnull(column):
                    pass

                else:
                    new_columns.append(column)

            # update column names
            df.columns = new_columns

            # convert numeric values to numeric values
            df = df.apply(pd.to_numeric, errors='ignore')

            # format types
            types_fmt = {
                'Rubble mound': 'Rubble Mound',
                'rubble mound': 'Rubble Mound',
                'Composite breakwater': 'Composite',
                'composite': 'Composite',
                'berm breakwater': 'Berm',
                'Berm breakwater': 'Berm',
                'Berm Breakwater': 'Berm',
                'Caisson breakwater': 'Caisson',
                'caisson breakwater': 'Caisson',
                'caisson': 'Caisson'}
            df.type = df.type.replace(types_fmt)

            # fix incorrect classified types
            units = ['Tetrapod', 'Antifer', 'Cubes', 'Accropode',
                     'Accropode II', 'Core-Loc', 'Xbloc', 'XblocPlus',
                     'Dolos', 'COB', 'Stabit', 'Cubipod']
            for unit in units:
                df.type = df.type.replace(unit, 'Rubble Mound')

        else:
            # load data from csv file
            file = os.path.dirname(os.path.abspath(__file__))
            df = pd.read_csv(f'{file}\\database.csv', index_col=0)

        # set df as attribute
        self.df = df

        # set markers and colors as private attributes
        self._markers = {
            'Rubble Mound': 'o', 'Caisson': 's', 'Berm': 'D',
            'Revetment': '>', 'Composite': '*', 'Unclassified': 'p'}
        self._colors = {
            'Rubble Mound': '#1f77b4', 'Caisson': '#ff7f0e', 'Berm': '#2ca02c',
            'Revetment': '#d62728', 'Composite': '#9467bd',
            'Unclassified': '#bcbd22'}

    @staticmethod
    def _del_zeros(df):
        """ Delete rows containing a zero """
        # make list to store ids to delete
        to_delete = []

        # iterate over the df
        for id, row in df.iterrows():
            # iterate over the row
            for param, value in row.iteritems():
                # check if value is zero
                if value == 0:
                    # row must be deleted
                    to_delete.append(id)
                    break

        # delete rows and return df
        return df.drop(labels=to_delete, axis=0)

    @staticmethod
    def _show_unclassified(show, df):
        """ Helper method to filter the df for unclassified bw types """
        # check if unclassified must be plotted
        if show:
            # replace nan with unclassified so label is correct in plot
            df.type = df.type.replace(np.nan, 'Unclassified')
        else:
            # remove unclassified breakwaters
            df.dropna(subset=['type'], inplace=True)

        return df

    @staticmethod
    def _validate_excludes(input):
        """ Verify the input for the correct type """
        # check input of excludes
        if input is not None and not isinstance(input, list):
            # raise TypeError for incorrect type of exclude
            raise TypeError(
                ('Types to exclude must be given in a list, not as '
                f'{type(input).__name__}'))

    @staticmethod
    def _validate_all_excludes(input):
        """ Validate if specified types have not been excluded """
        if input:
            # not all given types were in the database,
            # and were thus not excluded, therefore show warning
            not_excluded = ', '.join(input)
            user_warning(
                ('The following types in exclude are not in the database: '
                f'{not_excluded}'))

    @property
    def unclassified(self):
        """ Get the number of unclassified breakwaters """
        # get the unclassified breakwaters
        return self.df.type.isna().sum()

[docs]    def report(self, save=True, save_path='data_report.xlsx', decimals=3):
        """ Make a report of the data in the database

        Method to generate a data report of the database. For each
        breakwater type the total and missing number of datapoints is
        determined. Furthermore, for numerical values the mean and
        standard deviation is also computed.

        .. note:
           Note that the mean and standard deviation is not computed
           for the following parameters: start, finish, front size,
           rear size, latitude and longitude

        Parameters
        ----------
        save : bool, optional, default: True
            If True an Excel version of the data report is generated,
            use :py:obj:`save_path` to specify a save path
        save_path : str
            File path to save the Excel file to
        decimals : int, optional, default: 3
            Number of decimal places to round to (default: 0). If
            decimals is negative, it specifies the number of
            positions to the left of the decimal point.

        Returns
        -------
        pd.DataFrame
            if :py:obj:`save` is False a DataFrame of the report is
            returned
        """
        # create df to store data report in
        data_report = pd.DataFrame()

        # drop unclassified breakwaters
        df = self._show_unclassified(False, self.df)

        # define columns of which the mean and standard deviation does have
        # to be computed
        no_computations_required = [
            'start', 'finish', 'front size', 'rear size', 'Lat', 'Lon']

        # make list for headers and subheaders
        headers, subheaders = ['Type'], ['']

        # set bool to track if subheaders have been added for wall column
        add_subheaders_wall = True

        # iterate over the bw types
        for i, bw_type in enumerate(df.type.unique()):
            # get only the bws of the current type
            bws = df[df.type == bw_type]

            # create dict with bw_type to store data
            type_data = {'type': [bw_type]}

            # iterate over the columns with dtype (for computing mean, etc)
            for column, dtype in bws.dtypes.iteritems():
                # add name of the column as a header if first iteration
                if i == 0 and column != 'type':
                    headers.append(column)

                # check if column has the bw_type
                if column == 'type':
                    # pass as this has already been added
                    continue

                # check type of the column
                elif dtype == np.object:
                    # column contains strings or is a coordinate
                    # count total number of datapoints
                    total = len(bws[column].values)

                    # check if column contains the slope
                    if 'slope' in column:
                        # check bw type
                        if bw_type == 'Caisson' or bw_type == 'Composite':
                            # have slope 1:0 but this is not needed since
                            # these are vertical structures
                            type_data[column] = ['-']

                            # add subheader if first iteration
                            if i == 0:
                                subheaders.append('')
                            continue

                        else:
                            # slope 1:0 is specified when the slope unknown
                            # so replace these values with nan
                            column_data = bws[column].replace('1:0', np.nan)

                            missing = column_data.isna().sum()

                    else:
                        # count missing datapoints
                        missing = bws[column].isna().sum()

                    # add to column
                    type_data[column] = [f'{total-missing}/{total}']

                    # add subheader if first iteration
                    if i == 0:
                        subheaders.append('')

                elif column in no_computations_required:
                    # replace zeros for nan
                    column_data = bws[column].replace(0, np.nan)

                    # compute total and missing datapoints
                    total = len(bws[column].values)
                    missing = column_data.isna().sum()

                    # add to column
                    type_data[column] = [f'{total-missing}/{total}']

                    # add subheader if first iteration
                    if i == 0:
                        subheaders.append('')

                else:
                    # column is an int or float
                    # check if bw type is not caisson and column is wall
                    if column == 'wall' and bw_type != 'Caisson':
                        # wall is not a parameter of other structures
                        type_data[f'{column} no'] = ['-']
                        type_data[f'{column} comp'] = ['-']

                        # add subheader if first iteration
                        if add_subheaders_wall:
                            headers.append(column)
                            subheaders.append('')
                            subheaders.append('')
                            add_subheaders_wall = False
                        continue

                    # replace zeros for nan
                    column_data = bws[column].replace(0, np.nan)

                    # compute total and missing datapoints
                    total = len(bws[column].values)
                    missing = column_data.isna().sum()

                    # compute average and standard deviation
                    mean = column_data.mean()
                    std = column_data.std()

                    # add data to dict
                    type_data[f'{column} no'] = [f'{total-missing}/{total}']
                    type_data[f'{column} comp'] = [f'{mean}±{std}']

                    # append extra column to headers and set subheaders
                    if i == 0:
                        headers.append(column)
                        subheaders.append('datapoints')
                        subheaders.append('μ ± σ')

                        # check if column is wall for setting the bool
                        if column == 'wall':
                            # set to False because has been added here
                            add_subheaders_wall = False

            # add dict to df
            temp_df = pd.DataFrame(data=type_data)
            data_report = data_report.append(
                temp_df, ignore_index=True, sort=False)

        # update columns of the df with double row of headers
        data_report.columns = [headers, subheaders]

        # check if df must be saved
        if save:
            # save df
            data_report.to_excel(save_path)
        else:
            # return df
            return data_report

[docs]    def correlation(self, param1, param2, bw_type=None, method='pearson'):
        """ Compute the correlation between two parameters

        Parameters
        ----------
        param1 : str
            name of parameter 1
        param2 : str
            name of parameter 2
        bw_type : str, optional, default: None
            if specified only the values of the given bw_type are
            considered
        method : {pearson, spearman}, optional, default: pearson
            method of correlation

        Returns
        -------
        tuple
            correlation between the two parameters and the p-value
        """
        # check if bw_type is given
        if bw_type is not None:
            # only for bw_type
            pass

        else:
            # for all bw types
            # get the two columns and remove zeros
            filtered_df = self._del_zeros(self.df[[param1, param2]])

        # method of correlation
        if method == 'pearson':
            # pearson method
            corr = stats.pearsonr(
                filtered_df[param1].values, filtered_df[param2].values)

        elif method == 'spearman':
            # spearman method
            corr = stats.spearmanr(
                filtered_df[param1].values, filtered_df[param2].values)

        else:
            raise NotImplementedError(
                f'{method} is not supported, must be pearson or spearman')

        return corr

[docs]    def cross_section(self, id, B=None, Rc=None, h=None, slope=None):
        """ Plot a cross section of the breakwater

        Method to plot a cross section of a breakwater in the database.
        The breakwater is selected by the id of the breakwater. In case
        data is missing to plot the breakwater it is possible to specify
        these as arguments.

        .. warning::
           plot function currently only supports Rubble Mound and
           caisson breakwaters

        Parameters
        ----------
        id : int
            id of the breakwater to plot
        B : float, optional, default: None
            specify custom crest width
        Rc : float, optional, default: None
            specify custom crest height
        h : float, optional, default: None
            specify a custom water level
        slope : tuple, optional, default: None
            specify a custom slope, must be specified as a tuple (V, H)

        Raises
        ------
        ValueError
            If data required to plot a cross section is missing
        """
        # set custom slope if not specified for protecting _validate
        if slope is None:
            slope = (0,0)

        # get the data of the bw
        bw = self.df[self.df.index == id]
        type = bw.type.values[0]

        # get hydraulic data
        depth = _validate_plot_vals('depth', bw['depth'].values[0], h)

        # get geometric data
        freeboard = _validate_plot_vals('Rc', bw['heigth'].values[0], Rc)
        width = _validate_plot_vals('B', bw['width'].values[0], B)

        # set infobox with general info
        cost = bw['cost(M$)'].values[0]
        infobox = '\n'.join((
            f'constructed between {bw.start.values[0]} and {bw.finish.values[0]}',
            f'cost = {cost} M$',
            f'owner = {bw.owner.values[0]}',
            f'contractor = {bw.contractor.values[0]}',
            f'consultant = {bw.consultant.values[0]}',
            f'Hs = {bw.Hs.values[0]} m',
            f'Tz = {bw.Tz.values[0]} s',
            f'Tp = {bw.Tp.values[0]} s',
            f'Rc = {freeboard} m',
            f'h = {depth} m',))

        # check type
        if type == 'Rubble Mound':
            # get the slope of the bw
            slope_databasse = bw['front slope'].values[0].split(':')
            V = _validate_plot_vals(
                'slope', float(slope_databasse[0]), slope[0])
            H = _validate_plot_vals(
                'slope', float(slope_databasse[1]), slope[1])

            # compute coordinates
            x1 = H*(depth+freeboard)/V
            xwlev = H*freeboard/V

            x = [-x1-0.5*width, -0.5*width, 0.5*width, x1+0.5*width]
            y = [0, depth+freeboard, depth+freeboard, 0]

            # set additional width for xmin and xmax
            increase_x = 0

            # add extra info of rubble mound
            armour = bw['front armour'].values[0]
            size = bw['front size'].values[0]
            unit = bw['front (unit)'].values[0]

            infobox = '\n'.join((
                infobox,
                f'armour = {armour} of {size} {unit}',
                f'slope = {V}:{H}'))

        elif type == 'Caisson':
            # compute coordinates
            xwlev = 0

            x = [-0.5*width, -0.5*width, 0.5*width, 0.5*width]
            y = [0, depth+freeboard, depth+freeboard, 0]

            # set additional width for xmin and xmax
            increase_x = 2*width

        else:
            raise NotImplementedError(
                f'{type} is currently not supported for plotting')

        # create figure
        fig, ax = plt.subplots(figsize=(10,5))

        # plot bw
        ax.plot(x,y, color='k', lw=1)

        # get the xmin and xmax
        xmin = ax.get_xlim()[0]*1.2 - increase_x
        xmax = ax.get_xlim()[1]*1.2 + increase_x

        # plot bottom and wlev (left and right)
        ax.axhline(y=0, color='peru', lw=1, zorder=5)
        ax.hlines(
            y=depth, xmin=xmin, xmax=-xwlev-0.5*width, color='dodgerblue', lw=1)
        ax.hlines(
            y=depth, xmin=xwlev+0.5*width, xmax=xmax, color='dodgerblue', lw=1)

        # place the info box with all info
        props = dict(boxstyle='round', facecolor='whitesmoke', alpha=0.5)
        ax.text(
            1.02, 0.99, infobox, transform=ax.transAxes, fontsize=9,
            verticalalignment='top', bbox=props)


        # format the figure
        ax.set_xlim(xmin, xmax)
        plt.title(
            (f'Cross section of the {type.lower()} breakwater at '
             f'{bw.harbour.values[0]}, {bw.country.values[0]}'))

        ax.set_aspect('equal', adjustable='box')
        ax.grid()
        fig.tight_layout()
        plt.show()

[docs]    def map(
            self, area=[], resolution='c', show_unclassified=False,
            exclude=None):
        """ Plot the breakwaters on a world map

        Method to plot all breakwaters with coordinates on a map of
        the world, or part of the world if an area is specified. Method
        uses :py:obj:`Basemap` to generate the map.

        Parameters
        ----------
        area : list, optional, default: []
            specify the coordinates of the area to plot. Use following
            format [llcrnrlon, llcrnrlat, urcrnrlon, urcrnrlat]
        resolution : str, optional, default: c
            resolution of the map to use. Can be c (crude), l (low),
            i (intermediate), h (high), f (full).
        show_unclassified : bool, optional, default: False
            True is unclassified breakwaters with a coordinate must be
            plotted, False is unclassified breakwaters must not be
            plotted.
        exclude : list, optional, default: None
            list of breakwater types to exclude from the plot
        """
        # validate exclude input for type
        self._validate_excludes(exclude)

        # create the figure
        plt.figure(figsize=(16,12))

        # check if an area is specified
        if not area:
            # plot full map
            m = Basemap(
                lat_0=0, lon_0=0, projection='robin', resolution=resolution)
            size = 7

        else:
            # plot the specified area
            m = Basemap(
                llcrnrlon=area[0], llcrnrlat=area[1], urcrnrlon=area[2],
                urcrnrlat=area[3], resolution=resolution)
            size = 15

        # edit lay-out of the map
        m.drawcountries(color='#d7d7d7')
        m.drawmapboundary(fill_color='#D0CFD4', linewidth=0)
        m.fillcontinents(color='#EFEFEF', lake_color='#D0CFD4')

        # remove bw's without a Lon and\or Lat
        filtered_df = self._del_zeros(self.df[['type', 'Lon', 'Lat']])

        # filter df for showing unclassified bw types
        filtered_df = self._show_unclassified(show_unclassified, filtered_df)

        # iterate over the bw types to plot them
        for i, bw_type in enumerate(filtered_df.type.unique()):
            # check if type is in exclude
            if exclude is not None and bw_type in exclude:
                # pass and delete type from exclude
                exclude.remove(bw_type)

            else:
                # get only the bws of the current type
                bws = filtered_df[filtered_df.type == bw_type]

                # plot the bws
                xpt, ypt = m(bws.Lon.values, bws.Lat.values)
                m.scatter(
                    xpt, ypt, s=size, alpha=1, label=bw_type,
                    c=self._colors.get(bw_type, '#17becf'),
                    marker=self._markers.get(bw_type, 'H'), zorder=2)

        # check if all types in exclude are in the database
        self._validate_all_excludes(exclude)

        # add legend, set tight_layout and show plot
        plt.legend(loc=1)
        plt.tight_layout()
        plt.show()

[docs]    def scatter(
            self, param1, param2, show_unclassified=False, exclude=None,
            min_data=5, xmax=None, ymax=None, bins_param1=10, bins_param2=10):
        """ Make a scatter plot of two parameters

        Method to generate a scatter plot with histograms for two
        parameters in the database.

        Parameters
        ----------
        param1 : str
            name of parameter 1
        param2 : str
            name of parameter 2
        show_unclassified : bool, optional, default: False
            True is unclassified breakwaters with a coordinate must be
            plotted, False is unclassified breakwaters must not be
            plotted.
        exclude : list, optional, default: None
            list of breakwater types to exclude from the plot
        min_data : int, optional, default: 5
            minimum number of datapoints required for plotting, if the
            data for a bw type is less than this limit it will be skipped
        xmax : float, optional, default: None
            maximum x coordinate of the scatter plot, by default this
            limit is automatically determined
        ymax : float, optional, default: None
            maximum y coordinate of the scatter plot, by default this
            limit is automatically determined
        bins_param1 : str
            number of bins for param2
        bins_param2 : str
            number of bins for param2
        """
        # validate exclude input for type
        self._validate_excludes(exclude)

        # filter the df to remove zeros
        filtered_df = self._del_zeros(self.df[['type', param1, param2]])

        # filter df for showing unclassified bw types
        filtered_df = self._show_unclassified(show_unclassified, filtered_df)

        # create the figure
        fig = plt.figure(figsize=(12,9))
        gs = GridSpec(4,4)

        # create the scatter plot
        scatter_plot = fig.add_subplot(gs[1:4,0:3])

        # make the histograms of the two parameters
        top_hist = fig.add_subplot(gs[0,0:3])
        right_hist = fig.add_subplot(gs[1:4,3])

        # create empty list for storing data of bw types
        hist_param1, hist_param2 = [], []

        # create empty list to store colors for histogram
        colors = []

        # iterate over the bw types
        for bw_type in filtered_df.type.unique():
            # check if type is in exclude
            if exclude is not None and bw_type in exclude:
                # pass and delete type from exclude
                exclude.remove(bw_type)

            else:
                # get only the bws of the current type
                bws = filtered_df[filtered_df.type == bw_type]

                # check if there is enough data
                if (len(bws[param1].values) <= min_data
                        or len(bws[param2].values) <= min_data):
                    # not enough data, show warning
                    user_warning(
                        f'{bw_type} was skipped because of a lack of data')

                else:
                    # enough data, plot on scatter
                    scatter_plot.scatter(
                        bws[param1].values, bws[param2].values, s=18,
                        c=self._colors.get(bw_type, '#17becf'), label=bw_type,
                        marker=self._markers.get(bw_type, 'H'), zorder=2)

                    # add used color to the list
                    colors.append(self._colors.get(bw_type, '#17becf'))

                    # add to hist lists
                    hist_param1.append(bws[param1].values)
                    hist_param2.append(bws[param2].values)

        # check if all types in exclude are in the database
        self._validate_all_excludes(exclude)

        # determine x and y limits, if not specified as arguments
        if xmax is None:
            # determine xmax
            xmax = np.round(scatter_plot.get_xlim()[1])

        if ymax is None:
            # determine ymax
            ymax = np.round(scatter_plot.get_ylim()[1])

        # generate the bins
        param1_bins = np.linspace(0, xmax, bins_param1)
        param2_bins = np.linspace(0, ymax, bins_param2)

        # add the histograms to the plot
        top_hist.hist(hist_param1, bins=param1_bins, color=colors)
        right_hist.hist(
            hist_param2, bins=bins_param2, orientation='horizontal',
            color=colors)

        # set x and y lims
        scatter_plot.set_xlim(0, xmax)
        scatter_plot.set_ylim(0, ymax)
        top_hist.set_xlim(0, xmax)
        right_hist.set_ylim(0, ymax)

        # add grid to all plots
        scatter_plot.grid()
        top_hist.grid()
        right_hist.grid()

        # set labels
        scatter_plot.set_xlabel(param1.capitalize())
        scatter_plot.set_ylabel(param2.capitalize())
        top_hist.set_ylabel('Frequency')
        right_hist.set_xlabel('Frequency')

        # remove ticks from histograms
        plt.setp(top_hist.get_xticklabels(), visible=False)
        plt.setp(right_hist.get_yticklabels(), visible=False)

        # add legend, set tight_layout and show plot
        scatter_plot.legend()
        plt.tight_layout()
        plt.show()

[docs]    def hist(
            self, param, show_unclassified=False, exclude=None, min_data=5,
            xmax=None, bins=10):
        """ Plot a histogram of a parameter

        Parameters
        ----------
        param : str
            name of the parameter
        show_unclassified : bool, optional, default: False
            True is unclassified breakwaters with a coordinate must be
            plotted, False is unclassified breakwaters must not be
            plotted.
        exclude : list, optional, default: None
            list of breakwater types to exclude from the plot
        min_data : int, optional, default: 5
            minimum number of datapoints required for plotting, if the
            data for a bw type is less than this limit it will be skipped
        xmax : float, optional, default: None
            maximum x coordinate of the histogram, by default this
            limit is automatically determined
        bins : int, optional, default: 10
            number of bins
        """
        # validate exclude input for type
        self._validate_excludes(exclude)

        # remove the zero values from the df
        df = self.df[['type', param]]
        df = df[df[param] != 0]

        # filter df for showing unclassified bw types
        filtered_df = self._show_unclassified(show_unclassified, df)

        # create lists to store labels, data and colors
        labels, data, colors = [], [], []

        # set variable to track maximum x
        xmax_computed = 0

        # iterate over the types in the df to get the data
        for bw_type in filtered_df.type.unique():
            # check if type is in exclude
            if exclude is not None and bw_type in exclude:
                # pass and delete type from exclude
                exclude.remove(bw_type)

            else:
                # get only the bws of the current type
                bws = filtered_df[filtered_df.type == bw_type]

                # check if there is enough data
                if len(bws[param].values) <= min_data:
                    # not enough data, show warning
                    user_warning(
                        f'{bw_type} was skipped because of a lack of data')

                else:
                    # enough data, plot on scatter
                    # add used color, data and label to the list
                    labels.append(bw_type)
                    data.append(bws[param].values)
                    colors.append(self._colors.get(bw_type, '#17becf'))

                    # get maximum value
                    if np.max(bws[param].values) >= xmax_computed:
                        xmax_computed = np.max(bws[param].values)

        # check if all types in exclude are in the database
        self._validate_all_excludes(exclude)

        # determine x limit, if not specified as arguments
        if xmax is None:
            # determine xmax
            xmax = np.round(xmax_computed)

        # generate the bins
        generated_bins = np.linspace(0, xmax, bins)

        # plot histogram
        plt.hist(data, bins=generated_bins, label=labels, color=colors)

        # format axis
        plt.xlim(0, xmax)
        plt.xlabel(f'{param}')
        plt.ylabel('Frequency')

        # set other lay-out
        plt.title(f'Frequency Histogram of {param}')
        plt.grid()
        plt.legend()
        plt.tight_layout()
        plt.show()


def _validate_plot_vals(param, val, given_val):
    """ Validate the values to plot """
    if val == 0:
        # check if val is given
        if given_val != 0:
            # return specified value
            return given_val
        else:
            # raise error
            raise ValueError(
                (f'No value for {param} in the database, use arguments to '
                  'specify custom value'))
    else:
        # return value
        return val