Source code for pytom3d.util

import os
import functools
import glob
import pickle
import numpy as np
import pandas as pd
import re
from typing import Tuple, List, Any
from matplotlib import pyplot as plt

from pytom3d.stats import running_mean, running_std


[docs] def export_regressor(regressor, folder: str = "./", filename: str = "my_regressor", extension: str = ".rg", excluded_keys: List[str] = [], forced_values: List[Any] = []) -> None: """ Export the attributes of a regressor to a dictionary for storage or inspection. Parameters ---------- regressor : object The regressor object to export. path_to_file : str, optional The path to the file for exporting the regressor attributes. Default is "./". excluded_keys : list of str, optional A list of attribute keys to be excluded from the exported dictionary. forced_values : list of any, optional A list of values to be used for attributes specified in `excluded_keys`. Returns ------- The dictionary containing the attributes of the regressor. Raises ------ AssertionError If the length of `excluded_keys` is not equal to the length of `forced_values`. Notes ----- This function is provided for convenience to handle backward compatibility of scikit-learn. """ regressor_dict = {} assert len(excluded_keys) == len(forced_values) for k in vars(regressor).keys(): if k in excluded_keys: regressor_dict[k] = forced_values[excluded_keys.index(k)] else: regressor_dict[k] = vars(regressor)[k] save(regressor_dict, folder, filename, extension) return regressor_dict
[docs] def import_regressor(folder: str = "./", filename: str = "my_regressor.rg", init_regressor = None) -> dict: """ Import a regressor's attributes from a saved file and update an initialized regressor object. Parameters ---------- folder : str, optional The path to the folder containing the saved regressor attributes file. Default is "./". filename : str, optional The name of the file containing the saved regressor attributes. Default is "my_regressor.rg". init_regressor : object The initialized regressor object to be updated with the imported attributes. Returns ------- dict A dictionary containing the imported regressor attributes. Notes ----- This function is provided for convenience to handle backward compatibility of scikit-learn. """ regressor_dict = load(folder, filename) for k in regressor_dict.keys(): setattr(init_regressor, k, regressor_dict[k]) return regressor_dict
[docs] def trials(regressor, mesh, n: int = 1, folder: str = "./") -> None: """ Generate and save trial data using a Gaussian Process Regression model. Parameters ---------- regressor : The regressor of the topography. mesh : Topography The topogrphy object containing mesh data points for prediction. n : int, optional Number of trials to generate (default is 1). folder : str, optional The folder path to save the trial data files (default is "./"). Returns ------- None """ for h in range(1, n+1): pred, sigma = regressor.predict(mesh.P[:, 0:2], return_std=True) noise = np.random.normal(loc=0, scale=sigma) output = np.vstack([mesh.P[:, 0], mesh.P[:, 1], mesh.P[:, 2], pred, np.clip(max(0, h-1), 0, 1)*noise]).T np.savetxt(folder+mesh.name+"_" + str(h) + ".txt", output)
[docs] def predict_at_node(xx, yy, regressor): """ Predict the value at a specific node in a regression model. Parameters ---------- xx : float The x-coordinate of the node. yy : float The y-coordinate of the node. regressor : numpy.ndarray The regression model containing node information. Returns ------- float The predicted value at the specified node. Raises ------ Exception If there is not exactly one node matching the specified coordinates. """ node_id = np.where(np.isclose(regressor[:, 0], xx, atol=1e-8) & np.isclose(regressor[:, 1], yy, atol=1e-8))[0] xm = regressor[node_id][0] ym = regressor[node_id][0] print(node_id) print("x:", xm, xx) print("y:", ym, yy) if len(node_id) == 1: return regressor[node_id][0][3] + regressor[node_id][0][4] else: raise Exception("There must be only one node.")
[docs] def prediction_wrapper(regressor, x, y) -> Tuple[np.ndarray]: """ Predict the target variable and its uncertainty for given x and y coordinates using a regressor. Parameters ---------- regressor : Regressor The trained regressor model. x : float The x-coordinate for prediction. y : float The y-coordinate for prediction. Returns ------- tuple A tuple containing the predicted value and its associated standard deviation (uncertainty). """ p = np.array([x, y]).reshape(1, -1) pred, sigma = regressor.predict(p, return_std=True) return pred[0], sigma[0]
[docs] def save(obj, folder: str = "./", filename: str = "my_file", extension: str = ".bin") -> None: """ Save the given object to a binary file using pickle. Parameters ---------- - obj: Any The object to be saved. - folder: str, optional The directory path where the file will be saved. Default is "./". - filename: str, optional The name of the file to be saved. Default is "my_file". - extension: str, optional The file extension. Default is ".bin". Returns ------- None """ with open(folder + filename + extension, 'wb') as file: pickle.dump(obj, file)
[docs] def load(path: str = "./"): """ Load an object from a binary file using pickle. Parameters ---------- - path: str, optional The path of the file. Default is "./". Returns ------- Any The loaded object. """ with open(path, 'rb') as file: return pickle.load(file)
[docs] def list_files(folder: str = "./", extension: str = ".gpr") -> List[str]: """ List files in a folder. Parameters ---------- folder : str, optional Path to the folder to search for files. Default is "./". extension : str, optional File extension to filter files. Default is ".gpr". Returns ------- List[str] A list of file paths with the specified extension in the folder. """ folder_path = folder files = glob.glob(folder_path + '/*' + extension) return [file for file in files]
[docs] def gather_data(match: str, inp: List[int], out: int, path: str, *list_path: List[str]) -> None: """ Load data from multiple files, extract specified columns, and save to a csv file. Parameters ---------- match : str A regular expression pattern to match against the file paths. inp : List[int] List of column indices to extract as input features. out : int Index of the column to extract as the output feature. path : str Path to the output Excel file. *list_path : List[str] Variable number of file paths containing the data. Returns ------- None """ n2c = {"0": "x", "1": "y"} df = pd.DataFrame() input_cols = np.load(list_path[0])[:, inp] for r in range(0,len(inp)): df.insert(r, n2c[str(r)], input_cols[:,r]) for p in list_path: print(p) list_idx = list_path.index(p) regex_idx = re.search(match, p).group(0) output_col = np.load(p)[:, out] temp_df = pd.DataFrame({regex_idx: output_col}) df = pd.concat([df, temp_df], axis=1) df.to_csv(path, index=False)
[docs] def get_coordinates(inp: List[int], *list_path: List[str]) -> np.ndarray: """ Load and return the specified columns as coordinates from the first file path provided. Parameters ---------- inp : List[int] List of column indices to extract as coordinates. list_path : List[str] Variable number of file paths containing the data. Only the first file path is used. Returns ------- np.ndarray An array containing the coordinates extracted from the specified columns of the first file. """ return np.load(list_path[0])[:, inp]
[docs] def lite_dict(gpr_obj: Any): """ Load lite version of the regressor. Parameters ---------- gpr_obj : Any Object of the Gaussian Process Regressor. Returns ------- Dict[str, Any] Dictionary containing selected parameters and training data. Notes ----- This function loads a lite version of the Gaussian Process Regressor by extracting specific parameters and training data for testing purposes. """ gpr_ = load(gpr_obj) keys = ["k1__k1__constant_value", "k1__k1__constant_value_bounds", "k1__k2__length_scale", "k1__k2__length_scale_bounds", "k2__noise_level", "k2__noise_level_bounds"] values = [gpr_.kernel_.get_params()[k] for k in keys] params_dict = dict(zip(keys, values)) data_dict = {"X_train_": gpr_.X_train_, "y_train_": gpr_.y_train_} params_dict.update(data_dict) return params_dict
[docs] def update(method: callable): """ Decorator to update edges, centroid, cardinality, and record history after executing a method. Parameters ---------- method : callable The method to be decorated. Returns ------- callable Decorated method. Notes ----- This decorator assumes that the decorated method returns a list of tuples, where each tuple contains key-value pairs to be recorded in the event history. """ @functools.wraps(method) def wrapper(self, *args, **kwargs) -> None: """ Wrapper function to update edges, centroid, cardinality, and record history. Parameters ---------- self : object Instance of the class. *args : tuple Positional arguments passed to the decorated method. **kwargs : dict Keyword arguments passed to the decorated method. Returns ------- None Raises ------ Any exceptions raised by the decorated method. Notes ----- This wrapper assumes that the decorated method returns a list of tuples, where each tuple contains key-value pairs to be recorded in the event history. """ # retrive values the method returns data = method(self, *args, **kwargs) # update edges, centroid, and cardinality self.edges() self.centroid() self.cardinality() # structure data for history event = {} for d in data: event[d[0]] = d[1] self.history_.append(event) return wrapper
[docs] def contour_data_wrapper(path: str, match: str, pop_first=True, take_first=False) -> Tuple[np.ndarray]: """ Wrapper function for generating contour data. Parameters ---------- path : str Path to the directory containing data files. match : str A string used to match the desired data files. pop_first : bool, optional If True, remove and discard the first element of the resulting list. Default is False. take_first : bool, optional If True, return only the first matching file. Overrides pop_first. Default is False. Returns ------- Tuple[np.ndarray] A tuple containing the x-coordinates, y-coordinates, mean value, and standard deviation. """ data = recursive_search(path, match=match, pop_first=pop_first, take_first=take_first) mean = running_mean(3, None, *data) std = running_std(3, None, 1, *data) x, y = get_coordinates([0], *data), get_coordinates([1], *data) return x.reshape(-1), y.reshape(-1), mean, std
[docs] def scan_data_wrapper(path: str, match: str, pop_first=True, take_first=False) -> Tuple[np.ndarray]: """ Wrapper function for generating scan data. Parameters ---------- path : str Path to the directory containing data files. match : str A string used to match the desired data files. pop_first : bool, optional If True, remove and discard the first element of the resulting list. Default is False. take_first : bool, optional If True, return only the first matching file. Overrides pop_first. Default is False. Returns ------- Tuple[np.ndarray] A tuple containing the x-coordinates, mean value, and standard deviation. """ data = recursive_search(path, match=match, pop_first=pop_first, take_first=take_first) mean = running_mean(3, None, *data) std = running_std(3, None, 1, *data) x = get_coordinates([0], *data) return x.reshape(-1), mean, std
[docs] def printer(func: callable): """ A decorator for class methods that saves a figure if 'save' is True. Borrowed from https://github.com/aletgn/b-fade/blob/master/src/bfade/util.py This decorator wraps a method that generates a figure and a title, and it saves the figure to the specified location if 'save' is True. Parameters ---------- func : callable The function to be decorated, which generates a figure and a title. Returns ------- callable The decorated function. """ @functools.wraps(func) # <- preserve function signature def saver(self, *args, **kwargs): fig, title = func(self, *args, **kwargs) if self.save == True: fig.savefig(self.folder + title + "." + self.fmt, format = self.fmt, dpi = self.dpi, bbox_inches='tight') print(f"SAVE: {title}") else: print(f"SHOW: {title}") plt.show() return saver