Classes
pca
PCA model
Parameters
Xarray-like, shape (n_samples, n_features)Training data, where n_samples is the number of samples and n_features is the number of features.
labelarray-like, shape (n_samples,)Target data, where n_samples is the number of samples.
features_namearray-like, shape (n_features,), default=NoneName of features.
n_componentsint, default=2Number of components to keep.
scalestr, default='pareto'Method of scaling. 'pareto' for pareto scaling, 'mean' for mean centering, 'uv' for unitvarian scaling.
random_stateint, default=42Random state for permutation test.
test_sizefloat, default=0.3Size of test set.
Examples:
import pandas as pd import numpy as np from metbit import pca
# Create a dataset data = pd.DataFrame(np.random.rand(500, 50000)) class_ = pd.Series(np.random.choice(['A', 'B', 'C'], 500), name='Group') time = pd.Series(np.random.choice(['1-wk', '2-wk', '3-wk', '4-wk'], 500), name='Time point')
# Assign X and target X = datasets.iloc[:, 2:] y = datasets['Group'] time = datasets['Time point'] features_name = list(X.columns.astype(float))
## Perform PCA model
pca_mod = pca(X = X, label = y, features_name=features_name, n_components=2, scaling_method='pareto', random_state=42, test_size=0.3) pca_mod.fit()
# Visualisation of PCA model pca_mod.plot_observe_variance()
pca_mod.plot_cumulative_observed()
shape_ = {'1-wk': 'circle', '2-wk': 'square', '3-wk': 'diamond', '4-wk': 'cross'}
pca_mod.plot_pca_scores(symbol=time, symbol_dict=shape_)
pca_mod.plot_loading_()
pca_mod.plot_pca_trajectory(time_=time, time_order={'1-wk': 0, '2-wk': 1, '3-wk': 2, '4-wk': 3}, color_dict={'A': '#636EFA', 'B': '#EF553B', 'C': '#00CC96'}, symbol_dict=shape_)
Methods
__init__(self, X: Union[pd.DataFrame, np.ndarray], label: Optional[Union[pd.Series, np.ndarray, List[Any]]]=None, features_name: Optional[Union[pd.Series, np.ndarray, List[Any]]]=None, n_components: int=2, scaling_method: str='pareto', random_state: int=42, test_size: float=0.3)
fit(self)
Fit the PCA model to the data.
Returns
pca_modelsklearn.decomposition.PCAFitted PCA model.
Examples: >>> import numpy as np >>> import pandas as pd >>> from metbit.analysis.pca import pca >>> X = pd.DataFrame(np.random.rand(60, 100)) >>> label = pd.Series(['A'] * 30 + ['B'] * 30) >>> model = pca(X=X, label=label, n_components=2) >>> fitted = model.fit()
get_explained_variance(self)
Return the explained variance dataframe.
Returns
df_explained_variance_pd.DataFrameDataFrame containing PC labels, explained variance, and cumulative variance.
Examples: >>> import numpy as np >>> import pandas as pd >>> from metbit.analysis.pca import pca >>> X = pd.DataFrame(np.random.rand(60, 100)) >>> label = pd.Series(['A'] * 30 + ['B'] * 30) >>> model = pca(X=X, label=label, n_components=2) >>> model.fit() >>> df_ev = model.get_explained_variance()
get_scores(self)
Return the PCA scores dataframe.
Returns
df_scores_pd.DataFrameDataFrame containing PC scores and group labels.
Examples: >>> import numpy as np >>> import pandas as pd >>> from metbit.analysis.pca import pca >>> X = pd.DataFrame(np.random.rand(60, 100)) >>> label = pd.Series(['A'] * 30 + ['B'] * 30) >>> model = pca(X=X, label=label, n_components=2) >>> model.fit() >>> df_scores = model.get_scores()
get_loadings(self)
Return the PCA loadings dataframe.
Returns
df_loadings_pd.DataFrameDataFrame containing PC loadings for each feature.
Examples: >>> import numpy as np >>> import pandas as pd >>> from metbit.analysis.pca import pca >>> X = pd.DataFrame(np.random.rand(60, 100)) >>> label = pd.Series(['A'] * 30 + ['B'] * 30) >>> model = pca(X=X, label=label, n_components=2) >>> model.fit() >>> df_loadings = model.get_loadings()
get_q2_test(self)
Return the Q2 score computed on the held-out test set.
Returns
q2_testfloatQ2 score for the test set reconstruction.
Examples: >>> import numpy as np >>> import pandas as pd >>> from metbit.analysis.pca import pca >>> X = pd.DataFrame(np.random.rand(60, 100)) >>> label = pd.Series(['A'] * 30 + ['B'] * 30) >>> model = pca(X=X, label=label, n_components=2) >>> model.fit() >>> q2 = model.get_q2_test()
plot_observe_variance(self, fig_height: int=600, fig_width: int=800, font_size: int=15)
Visualise explained variance plot
Returns
figplotly.graph_objects.FigureExplained variance plot.
Examples: >>> import numpy as np >>> import pandas as pd >>> from metbit.analysis.pca import pca >>> X = pd.DataFrame(np.random.rand(60, 100)) >>> label = pd.Series(['A'] * 30 + ['B'] * 30) >>> model = pca(X=X, label=label, n_components=2) >>> model.fit() >>> fig = model.plot_observe_variance() >>> fig.show()
plot_cumulative_observed(self, fig_height: int=600, fig_width: int=800, font_size: int=15, marker_size: int=10)
Visualise cumulative variance plot
Returns
figplotly.graph_objects.FigureCumulative variance plot.
Examples: >>> import numpy as np >>> import pandas as pd >>> from metbit.analysis.pca import pca >>> X = pd.DataFrame(np.random.rand(60, 100)) >>> label = pd.Series(['A'] * 30 + ['B'] * 30) >>> model = pca(X=X, label=label, n_components=2) >>> model.fit() >>> fig = model.plot_cumulative_observed() >>> fig.show()
plot_pca_scores(self, pc: List[str]=['PC1', 'PC2'], color_: Optional[pd.Series]=None, color_dict: Optional[dict]=None, symbol_: Optional[pd.Series]=None, symbol_dict: Optional[dict]=None, marker_label: Optional[pd.Series]=None, fig_height: int=900, fig_width: int=1300, marker_size: int=35, marker_opacity: float=0.7, font_size: int=20, title_font_size: int=21, individual_ellipse: bool=True, legend_name: List[str]=['Group', 'Time point'])
Visualise PCA scores plot
Parameters
pclist, default=['PC1', 'PC2']List of principal components to plot.
colorarray-like, shape (n_samples,), default=NoneTarget data, where n_samples is the number of samples.
color_dictdict, default=NoneDictionary of color_ mapping.
symbol_array-like, shape (n_samples,), default=NoneTarget data, where n_samples is the number of samples.
symbol_dictdict, default=NoneDictionary of symbol_ mapping.
fig_heightint, default=900Height of figure.
fig_widthint, default=1300Width of figure.
marker_sizeint, default=35Size of marker.
marker_opacityfloat, default=0.7Opacity of marker.
text_array-like, shape (n_samples,), default=NoneText to display on each point.
Returns
figplotly.graph_objects.FigurePCA scores plot.
Examples: >>> import numpy as np >>> import pandas as pd >>> from metbit.analysis.pca import pca >>> X = pd.DataFrame(np.random.rand(60, 100)) >>> label = pd.Series(['A'] * 30 + ['B'] * 30) >>> model = pca(X=X, label=label, n_components=2) >>> model.fit() >>> fig = model.plot_pca_scores(pc=['PC1', 'PC2']) >>> fig.show()
plot_loading_(self, pc: List[str]=['PC1', 'PC2'], fig_height: int=600, fig_width: int=1800, font_size: int=20, title_font_size: int=20, marker_size: int=1, x_axis_title: str='𝛿<sub>H</sub> in ppm', xaxis_direction: str='reversed')
Visualise PCA loadings
Parameters
pclist, default=['PC1', 'PC2']Principle component to plot.
fig_heightint, default=600Height of figure.
fig_widthint, default=1800Width of figure.
Returns
figplotly.graph_objects.FigurePlotly figure.
Examples: >>> import numpy as np >>> import pandas as pd >>> from metbit.analysis.pca import pca >>> X = pd.DataFrame(np.random.rand(60, 100)) >>> label = pd.Series(['A'] * 30 + ['B'] * 30) >>> model = pca(X=X, label=label, n_components=2) >>> model.fit() >>> fig = model.plot_loading_(pc=['PC1', 'PC2']) >>> fig.show()
----------
plot_pca_trajectory(self, time_: pd.Series, time_order: dict, stat_: List[str]=['mean', 'sem'], pc: List[str]=['PC1', 'PC2'], color_dict: Optional[dict]=None, symbol_dict: Optional[dict]=None, fig_height: int=900, fig_width: int=1300, marker_size: int=35, marker_opacity: float=0.7, title_font_size: int=20, font_size: int=20, legend_name: List[str]=['Group', 'Time point'])
Visualise PCA trajectory plot showing group centroids across time points.
Parameters
time_pd.SeriesTime point labels for each sample.
time_orderdictDictionary mapping time point labels to their sort order.
stat_list, default=['mean', 'sem']Aggregation statistics[center ('mean' or 'median'), error ('sem' or 'std')].pclist, default=['PC1', 'PC2']List of two principal components to plot.
color_dictdict, default=NoneDictionary mapping group labels to colours.
symbol_dictdict, default=NoneDictionary mapping time point labels to marker symbols.
fig_heightint, default=900Height of figure.
fig_widthint, default=1300Width of figure.
marker_sizeint, default=35Size of marker.
marker_opacityfloat, default=0.7Opacity of marker.
title_font_sizeint, default=20Font size for title.
font_sizeint, default=20Font size for annotations.
legend_namelist, default=['Group', 'Time point']Legend labels for group and time point.
Returns
figplotly.graph_objects.FigurePCA trajectory plot.
Examples: >>> import numpy as np >>> import pandas as pd >>> from metbit.analysis.pca import pca >>> X = pd.DataFrame(np.random.rand(80, 100)) >>> label = pd.Series(['A'] * 40 + ['B'] * 40) >>> time = pd.Series(['T1', 'T2'] * 40) >>> model = pca(X=X, label=label, n_components=2) >>> model.fit() >>> fig = model.plot_pca_trajectory(time_=time, time_order={'T1': 0, 'T2': 1}) >>> fig.show()