Source code for tsgm.utils.visualization

import sklearn
import sklearn.manifold
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import math
import tensorflow as tf

import tsgm


DEFAULT_PALETTE_TSNE = {"hist": "red", "gen": "blue"}


[docs]def visualize_dataset( dataset: tsgm.dataset.DatasetOrTensor, obj_id: int = 0, palette: dict = DEFAULT_PALETTE_TSNE, path: str = "/tmp/generated_data.pdf", ) -> None: """ The function visualizes time series dataset with target values. :param dataset: A time series dataset. :type dataset: tsgm.dataset.DatasetOrTensor. """ plt.figure( num=None, figsize=(8, 4), dpi=80, facecolor="w", edgecolor="k" ) if isinstance(dataset, tsgm.dataset.Dataset): X = dataset.X y = dataset.y elif isinstance(dataset, np.ndarray) or tf.is_tensor(dataset): X = dataset y = None else: raise ValueError("`dataset` has an unknown type") T = X.shape[-1] if X.shape[1] >= 1: sns.lineplot(x=np.arange(0, T, 1), y=X[obj_id, -1], label="Feature #1") if X.shape[1] >= 2: sns.lineplot(x=np.arange(0, T, 1), y=X[obj_id, -2], label="Feature #2") if X.shape[1] >= 3: sns.lineplot(x=np.arange(0, T, 1), y=X[obj_id, -3], label="Feature #3") plt.xlabel("Time") plt.ylabel("Absolute value (measurements)") if y is not None: print([int(el) for el in y[obj_id]]) plt.ylabel("Target value(y)") plt.title("Generated data") plt.savefig(path)
[docs]def visualize_tsne_unlabeled( X: tsgm.types.Tensor, X_gen: tsgm.types.Tensor, palette: dict = DEFAULT_PALETTE_TSNE, alpha: float = 0.25, path: str = "/tmp/tsne_embeddings.pdf", fontsize: int = 20, markerscale: int = 3, markersize: int = 1, feature_averaging: bool = False, perplexity: float = 30.0 ) -> None: """ Visualizes t-SNE embeddings of unlabeled data. :param X: The original data tensor of shape (num_samples, num_features). :type X: tsgm.types.Tensor :param X_gen: The generated data tensor of shape (num_samples, num_features). :type X_gen: tsgm.types.Tensor :param palette: A dictionary mapping class labels to colors. Defaults to DEFAULT_PALETTE_TSNE. :type palette: dict, optional :param alpha: The transparency level of the plotted points. Defaults to 0.25. :type alpha: float, optional :param path: The path to save the visualization as a PDF file. Defaults to "/tmp/tsne_embeddings.pdf". :type path: str, optional :param fontsize: The font size of the class labels in the legend. Defaults to 20. :type fontsize: int, optional :param markerscale: The scaling factor for the size of the markers in the legend. Defaults to 3. :type markerscale: int, optional :param markersize: The size of the markers in the scatter plot. Defaults to 1. :type markersize: int, optional :param feature_averaging: Whether to compute the average features for each class. Defaults to False. :type feature_averaging: bool, optional """ tsne = sklearn.manifold.TSNE(n_components=2, perplexity=perplexity, learning_rate="auto", init="random") point_styles = ["hist"] * X.shape[0] + ["gen"] * X_gen.shape[0] if feature_averaging: X_all = np.concatenate((np.mean(X, axis=2), np.mean(X_gen, axis=2))) X_emb = tsne.fit_transform(np.resize(X_all, (X_all.shape[0], X_all.shape[1]))) else: X_all = np.concatenate((X, X_gen)) X_emb = tsne.fit_transform( np.resize(X_all, (X_all.shape[0], X_all.shape[1] * X_all.shape[2])) ) plt.figure(figsize=(8, 6), dpi=80) sns.scatterplot( x=X_emb[:, 0], y=X_emb[:, 1], hue=point_styles, style=point_styles, markers={"hist": "<", "gen": "H"}, palette=palette, alpha=alpha, s=markersize, ) plt.box(False) plt.axis("off") plt.tight_layout() plt.legend( bbox_to_anchor=(1, 1), loc=1, borderaxespad=0, fontsize=fontsize, markerscale=markerscale, ) plt.savefig(path)
[docs]def visualize_tsne( X: tsgm.types.Tensor, y: tsgm.types.Tensor, X_gen: tsgm.types.Tensor, y_gen: tsgm.types.Tensor, path: str = "/tmp/tsne_embeddings.pdf", feature_averaging: bool = False, perplexity=30.0 ) -> None: """ Visualizes t-SNE embeddings of real and synthetic data. This function generates a scatter plot of t-SNE embeddings for real and synthetic data. Each data point is represented by a marker on the plot, and the colors of the markers correspond to the corresponding class labels of the data points. :param X: The original real data tensor of shape (num_samples, num_features). :type X: tsgm.types.Tensor :param y: The labels of the original real data tensor of shape (num_samples,). :type y: tsgm.types.Tensor :param X_gen: The generated synthetic data tensor of shape (num_samples, num_features). :type X_gen: tsgm.types.Tensor :param y_gen: The labels of the generated synthetic data tensor of shape (num_samples,). :type y_gen: tsgm.types.Tensor :param path: The path to save the visualization as a PDF file. Defaults to "/tmp/tsne_embeddings.pdf". :type path: str, optional :param feature_averaging: Whether to compute the average features for each class. Defaults to False. :type feature_averaging: bool, optional """ tsne = sklearn.manifold.TSNE(n_components=2, perplexity=perplexity, learning_rate="auto", init="random") if feature_averaging: X_all = np.concatenate((np.mean(X, axis=2), np.mean(X_gen, axis=2))) X_emb = tsne.fit_transform(np.resize(X_all, (X_all.shape[0], X_all.shape[1]))) else: X_all = np.concatenate((X, X_gen)) X_emb = tsne.fit_transform( np.resize(X_all, (X_all.shape[0], X_all.shape[1] * X_all.shape[2])) ) y_all = np.concatenate((y, y_gen)) c = np.argmax(y_all, axis=1) colors = {0: "class 0", 1: "class 1"} c = [colors[el] for el in c] point_styles = ["hist"] * X.shape[0] + ["gen"] * X_gen.shape[0] plt.figure(figsize=(8, 6), dpi=80) sns.scatterplot( x=X_emb[:, 0], y=X_emb[:, 1], hue=c, style=point_styles, markers={"hist": "<", "gen": "H"}, alpha=0.7, ) plt.legend() plt.box(False) plt.axis("off") plt.savefig(path)
[docs]def visualize_ts(ts: tsgm.types.Tensor, num: int = 5) -> None: """ Visualizes time series tensor. This function generates a plot to visualize time series data. It displays a specified number of time series from the input tensor. :param ts: The time series data tensor of shape (num_samples, num_timesteps, num_features). :type ts: tsgm.types.Tensor :param num: The number of time series to display. Defaults to 5. :type num: int, optional Raises: AssertionError: If the input tensor does not have three dimensions. Example: >>> visualize_ts(time_series_tensor, num=10) """ assert len(ts.shape) == 3 fig, axs = plt.subplots(num, 1, figsize=(14, 10)) if num == 1: axs = [axs] ids = np.random.choice(ts.shape[0], size=num, replace=False) for i, sample_id in enumerate(ids): axs[i].imshow(ts[sample_id].T, aspect="auto")
[docs]def visualize_ts_lineplot( ts: tsgm.types.Tensor, ys: tsgm.types.OptTensor = None, num: int = 5, unite_features: bool = True, legend_fontsize: int = 12, tick_size: int = 10 ) -> None: """ Visualizes time series data using line plots. This function generates line plots to visualize the time series data. It randomly selects a specified number of samples from the input tensor `ts` and plots each sample as a line plot. If `ys` is provided, it can be either a 1D or 2D tensor representing the target variable(s), and the function will optionally overlay it on the line plot. :param ts: Input time series data tensor. :type ts: tsgm.types.Tensor :param ys: Optional target variable(s) tensor, defaults to None. :type ys: tsgm.types.OptTensor, optional :param num: Number of samples to visualize, defaults to 5. :type num: int, optional :param unite_features: Whether to plot all features together or separately, defaults to True. :type unite_features: bool, optional :param legend_fontsize: Font size to use. :type legend_fontsize: int, optional :param tick_size: Font size for y-axis ticks. :type tick_size: int, optional """ assert len(ts.shape) == 3 fig, axs = plt.subplots(num, 1, figsize=(14, 10)) if num == 1: axs = [axs] ids = np.random.choice(ts.shape[0], size=num, replace=False) for i, sample_id in enumerate(ids): if not unite_features: feature_id = np.random.randint(ts.shape[2]) sns.lineplot( x=range(ts.shape[1]), y=ts[sample_id, :, feature_id], ax=axs[i], label=rf"feature \#{feature_id}", ) else: for feat_id in range(ts.shape[2]): sns.lineplot( x=range(ts.shape[1]), y=ts[sample_id, :, feat_id], ax=axs[i], label="Generated" ) if ys is not None: axs[i].tick_params(labelsize=tick_size, which="both") if len(ys.shape) == 1: axs[i].set_title(ys[sample_id], fontsize=legend_fontsize) elif len(ys.shape) == 2: ax2 = axs[i].twinx() sns.lineplot( x=range(ts.shape[1]), y=ys[sample_id], ax=ax2, color="g", label="Condition", ) # axs[i].twinx().yaxis.set_ticks_position('right') ax2.tick_params(labelsize=tick_size) if i == 0: leg = ax2.legend(fontsize=legend_fontsize, loc='upper right') for legobj in leg.legendHandles: legobj.set_linewidth(2.0) else: ax2.get_legend().remove() else: raise ValueError("ys contains too many dimensions") if i == 0: leg = axs[i].legend(fontsize=legend_fontsize, loc='upper left') for legobj in leg.legendHandles: legobj.set_linewidth(2.0) else: axs[i].get_legend().remove() if i != len(ids) - 1: axs[i].set_xticks([])
[docs]def visualize_original_and_reconst_ts( original: tsgm.types.Tensor, reconst: tsgm.types.Tensor, num: int = 5, vmin: int = 0, vmax: int = 1, ) -> None: """ Visualizes original and reconstructed time series data. This function generates side-by-side visualizations of the original and reconstructed time series data. It randomly selects a specified number of samples from the input tensors `original` and `reconst` and displays them as images using imshow. :param original: Original time series data tensor. :type original: tsgm.types.Tensor :param reconst: Reconstructed time series data tensor. :type reconst: tsgm.types.Tensor :param num: Number of samples to visualize, defaults to 5. :type num: int, optional :param vmin: Minimum value for colormap normalization, defaults to 0. :type vmin: int, optional :param vmax: Maximum value for colormap normalization, defaults to 1. :type vmax: int, optional """ assert original.shape == reconst.shape fig, axs = plt.subplots(num, 2, figsize=(14, 10)) ids = np.random.choice(original.shape[0], size=num, replace=False) for i, sample_id in enumerate(ids): axs[i, 0].imshow(original[sample_id].T, aspect="auto", vmin=vmin, vmax=vmax) axs[i, 1].imshow(reconst[sample_id].T, aspect="auto", vmin=vmin, vmax=vmax)
[docs]def visualize_training_loss( loss_vector: tsgm.types.Tensor, labels: tuple = (), path: str = "/tmp/training_loss.pdf", ) -> None: """ Plot training losses as a function of the epochs :param loss_vector: np.array, having shape num of metrics times number of epochs :param labels: list of strings :param path: str, where to save the plot """ num_of_metrics = loss_vector.shape[0] num_of_epochs = loss_vector[0].shape[0] colors = [ {"color": "orange", "linewidth": 1, "alpha": 0.8}, {"color": "darkorchid"}, {"color": "pink"}, {"color": "blue"}, {"color": "red"}, {"color": "green"}, {"color": "black", "linewidth": 2}, ] fig, ax = plt.subplots(1, 1, figsize=(12, 5)) for i in range(num_of_metrics): label = labels[i] if i < len(labels) else "" loss = loss_vector[i] # scale loss to be in range [0, 0.xxx] max_magnitude = math.floor(math.log10(np.max(loss))) if max_magnitude >= 0: exp = max_magnitude + 1 loss /= 10 ** exp label += f" ($10^{exp}$)" if i < len(colors): # use custom styles until a style is defined ax.plot(range(num_of_epochs), loss, label=label, **colors[i]) else: ax.plot( range(num_of_epochs), loss, label=label, ) plt.legend() # Hide the right and top spines ax.spines.right.set_visible(False) ax.spines.top.set_visible(False) # Only show ticks on the left and bottom spines ax.yaxis.set_ticks_position("left") ax.xaxis.set_ticks_position("bottom") plt.savefig(path, dpi=80)