First we get the RGB data set:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd
from skimage import io
from sklearn.cluster import KMeans
= io.imread("https://upload.wikimedia.org/wikipedia/commons/thumb/9/97/Swallow-tailed_bee-eater_%28Merops_hirundineus_chrysolaimus%29.jpg/768px-Swallow-tailed_bee-eater_%28Merops_hirundineus_chrysolaimus%29.jpg")
photo
= np.array(photo, dtype=np.float64) / 255 # Scale values
photo = original_shape = tuple(photo.shape) # Get the current shape
w, h, d = np.reshape(photo, (w * h, d)) # Reshape to to 2D
image_array
= DataFrame(image_array, columns=["Red", "Green", "Blue"])
pixels
= pixels.sample(frac=0.05) pixels_sample
Then we compute the L*a*b* dataset:
from skimage.color import rgb2lab
= rgb2lab(photo) # This is where we convert colour space
photo_lab = original_shape = tuple(photo_lab.shape)
w, h, d = np.reshape(photo_lab, (w * h, d))
image_array_lab
= DataFrame(image_array_lab, columns=["L", "a", "b"])
pixels_lab
= pixels_lab.sample(frac=0.05) pixels_sample_lab
Then we normalise the two inertia values so we can compare them alongside each other:
= KMeans(n_clusters=1, n_init="auto").fit(pixels_sample[["Red", "Green", "Blue"]])
kmeans = KMeans(n_clusters=1, n_init="auto").fit(pixels_sample_lab[["L", "a", "b"]])
kmeans_lab
= kmeans_lab.inertia_ / kmeans.inertia_ norm
Then we loop over the number of clusters and calculate the inertia of each:
%matplotlib inline
= []
inertia_values = pd.RangeIndex(2, 10)
r for n_clusters in r:
= KMeans(n_clusters=n_clusters, n_init="auto").fit(pixels_sample[["Red", "Green", "Blue"]])
kmeans = KMeans(n_clusters=n_clusters, n_init="auto").fit(pixels_sample_lab[["L", "a", "b"]])
kmeans_lab / norm))
inertia_values.append((kmeans.inertia_, kmeans_lab.inertia_
= DataFrame(inertia_values, columns=["RGB", "L*a*b*"], index=r)
inertia inertia.plot()
<Axes: >