Classes
Normality_distribution
Compute and visualise normality distributions for NMR metabolomics data.
Parameters
datapd.DataFrameFeature matrix where rows are samples and columns are spectral features.
Examples
>>> import pandas as pd >>> import numpy as np >>> import metbit >>> X = pd.DataFrame(np.random.rand(20, 100)) >>> nd = metbit.Normality_distribution(X) >>> fig = nd.plot_distribution(X.columns[0])
Methods
__init__(self, data: pd.DataFrame)
plot_distribution(self, feature: str)
Plot histogram and Q-Q plot for a single spectral feature.
Parameters
featurestrColumn name of the feature to visualise.
Returns
matplotlib.pyplot The pyplot module after rendering both plots.
Examples
>>> import pandas as pd >>> import numpy as np >>> import metbit >>> X = pd.DataFrame(np.random.rand(20, 50), columns=[f"f{i}" for i in range(50)]) >>> nd = metbit.Normality_distribution(X) >>> fig = nd.plot_distribution("f0") >>> fig.show()
pca_distributions(self)
Plot histogram and Q-Q plot for the first two PCA score components.
Returns
matplotlib.pyplot The pyplot module after rendering both plots.
Examples
>>> import pandas as pd >>> import numpy as np >>> import metbit >>> X = pd.DataFrame(np.random.rand(30, 100)) >>> nd = metbit.Normality_distribution(X) >>> fig = nd.pca_distributions() >>> fig.show()
Normalise
Apply various normalisation strategies to a metabolomics feature matrix.
Parameters
datapd.DataFrameFeature matrix where rows are samples and columns are spectral features.
compute_missingbool, optionalIf True (default), impute missing values with KNN before normalising.
Examples
>>> import pandas as pd >>> import numpy as np >>> import metbit >>> X = pd.DataFrame(np.random.rand(20, 100)) >>> norm = metbit.Normalise(X, compute_missing=False) >>> norm_df = norm.pqn_normalise(plot=False)
Methods
__init__(self, data: pd.DataFrame, compute_missing: bool=True)
pqn_normalise(self, ref_index: list=None, plot: bool=True)
Return the normalised dataframe using the Probabilistic Quotient Normalisation (PQN) method.
Parameters
ref_indexlist, optionalRow indices to use as the reference set for computing the median spectrum. If None, all samples are used.
plotbool, optionalWhether to plot the histograms of normalisation factors and fold changes.
Returns
pd.DataFrame PQN-normalised feature matrix with the same index and columns as the input.
Examples
>>> import pandas as pd >>> import numpy as np >>> import metbit >>> X = pd.DataFrame(np.random.rand(20, 50)) >>> norm = metbit.Normalise(X, compute_missing=False) >>> norm_df = norm.pqn_normalise(plot=False) >>> norm_df.shape (20, 50)
decimal_place_normalisation(self, decimals: int=2)
Return the dataframe with values rounded to a specified number of decimal places.
Parameters
decimalsint, optionalThe number of decimal places to round to. Default is 2.
Returns
pd.DataFrame Rounded feature matrix.
Examples
>>> import pandas as pd >>> import numpy as np >>> import metbit >>> X = pd.DataFrame(np.random.rand(10, 20)) >>> norm = metbit.Normalise(X, compute_missing=False) >>> rounded_df = norm.decimal_place_normalisation(decimals=3)
z_score_normalisation(self)
Return the dataframe normalized using Z-Score standardisation.
Returns
pd.DataFrame Z-score normalised feature matrix.
Examples
>>> import pandas as pd >>> import numpy as np >>> import metbit >>> X = pd.DataFrame(np.random.rand(10, 20)) >>> norm = metbit.Normalise(X, compute_missing=False) >>> z_df = norm.z_score_normalisation()
linear_normalisation(self)
Return the dataframe normalized using Min-Max (linear) normalisation.
Returns
pd.DataFrame Min-Max normalised feature matrix with values in [0, 1].
Examples
>>> import pandas as pd >>> import numpy as np >>> import metbit >>> X = pd.DataFrame(np.random.rand(10, 20)) >>> norm = metbit.Normalise(X, compute_missing=False) >>> linear_df = norm.linear_normalisation()
normalize_to_100(self)
Return the dataframe with each column normalised to sum to 100.
Returns
pd.DataFrame Feature matrix where column values sum to 100.
Examples
>>> import pandas as pd >>> import numpy as np >>> import metbit >>> X = pd.DataFrame(np.random.rand(10, 20)) >>> norm = metbit.Normalise(X, compute_missing=False) >>> norm100_df = norm.normalize_to_100()
clipping_normalisation(self, lower: float, upper: float)
Return the dataframe with values clipped to the specified range.
Parameters
lowerfloatThe lower bound; values below this are set to ``lower``.
upperfloatThe upper bound; values above this are set to ``upper``.
Returns
pd.DataFrame Clipped feature matrix.
Examples
>>> import pandas as pd >>> import numpy as np >>> import metbit >>> X = pd.DataFrame(np.random.rand(10, 20)) >>> norm = metbit.Normalise(X, compute_missing=False) >>> clipped_df = norm.clipping_normalisation(lower=0.1, upper=0.9)
standard_deviation_normalisation(self)
Return the dataframe normalized using mean-centring and standard deviation scaling.
Returns
pd.DataFrame Mean-centred, SD-scaled feature matrix.
Examples
>>> import pandas as pd >>> import numpy as np >>> import metbit >>> X = pd.DataFrame(np.random.rand(10, 20)) >>> norm = metbit.Normalise(X, compute_missing=False) >>> sd_df = norm.standard_deviation_normalisation()
Functions
project_name_generator()
Generate a unique project name by combining a timestamp with a random codename.
Returns
str A string of the form ``'YYYYMMDDHHMMSSmss_RandomCodename'``.
Examples
>>> import metbit >>> name = metbit.project_name_generator() >>> isinstance(name, str) True