Zum Inhalt

clusters

build_clusters(images)

Build clusters of colours which are enclosed by a square-like shape. Square-like shapes are searched using canny edge detection and their inlying colours are collected in hsv color space. Then, kmeans clustering is performed using pre-defined initial centers corresponding to the expected colours in the hsv color space. We are looking for the colours present on the stickers of the Rubik's cube, namely red, blue, green, white, yellow and orange. Furthermore, we expect one more colour, which is black as a result of masking the image. This colours is only used as a bin for the colour (0,0,0), which is present in masked areas.

Parameters:

Name Type Description Default
images List[numpy.ndarray]

A list of numpy arrays (BGR images) with one side of the cube shown in each of the image. Six images are expected in total.

required

Returns:

Type Description
Tuple[sklearn.cluster._kmeans.KMeans, List[Tuple[int]]]

A tuple(A,B) where: A: The kmeans object returned by scikit-learn. Prediction of new colours can be done on this object. B: A list of bounding boxes for each of the images, containing all square-like contours.

Source code in core/cv/clusters.py
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
def build_clusters(images: List[np.ndarray]) -> Tuple[KMeans, List[Tuple[int]]]:
    """
    Build clusters of colours which are enclosed by a square-like shape.
    Square-like shapes are searched using canny edge detection and their inlying colours are collected in hsv color space.
    Then, kmeans clustering is performed using pre-defined initial centers corresponding to the expected colours in the
    hsv color space. We are looking for the colours present on the stickers of the Rubik's cube, namely
        red, blue, green, white, yellow and orange.
    Furthermore, we expect one more colour, which is black as a result of masking the image. This colours is only used
    as a bin for the colour `(0,0,0)`, which is present in masked areas.

    Args:
        images: A list of numpy arrays (BGR images) with one side of the cube shown in each of the image.
            Six images are expected in total.

    Returns:
        A tuple(A,B) where:
        A: The kmeans object returned by scikit-learn. Prediction of new colours can be done on this object.
        B: A list of bounding boxes for each of the images, containing all square-like contours.

    """
    assert len(images) == len(cube.FacePosition), f"Expected {len(cube.FacePosition)} face images but got: {len(images)}"

    if logger.isEnabledFor(logging.DEBUG):
        # plot clusters in hsv colour space
        import matplotlib.pyplot as plt
        fig = plt.figure()
        axis = fig.add_subplot(111, projection='3d')

    hsv_values = []
    color_values = []
    bounding_boxes = []
    for image in images:
        steps = []
        steps.append(image.copy())
        image = cv.bilateralFilter(image, 35, 50, 50)
        steps.append(image.copy())

        # extract edges using canny
        canny = cv.Canny(image, CANNY_TRESH, CANNY_TRESH * CANNY_RATIO)
        dilate_kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (3, 3))
        canny = cv.dilate(canny, dilate_kernel)

        # only keep square-like contours
        contours, _ = cv.findContours(canny, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE)
        contours = list(filter(lambda contour: core.cv.is_square_contour(contour, image.shape[1], image.shape[0]), contours))

        contour_image = cv.cvtColor(canny, cv.COLOR_GRAY2BGR)

        mask = np.zeros(image.shape[:2], dtype=np.uint8)
        for idx in range(len(contours)):
            cv.drawContours(mask, contours, idx, 255, cv.FILLED)
            cv.drawContours(contour_image, contours, idx, (0, 150, 250), 2)
        steps.append(contour_image)
        mask = cv.erode(mask, np.ones((5, 5), np.uint8))

        masked_image_bgr = cv.bitwise_and(image, image, mask=mask)
        rect = cv.boundingRect(mask)
        bounding_boxes.append(rect)
        if not any(rect):
            continue

        # convert image to hsv + resize image as to only use a few samples
        masked_image_bgr = masked_image_bgr[rect[1]:rect[1] + rect[3],rect[0]:rect[0] + rect[2]]
        masked_image_bgr = cv.resize(masked_image_bgr, (0, 0), fx=0.1, fy=0.1, interpolation=cv.INTER_NEAREST)
        masked_image_rgb = cv.cvtColor(masked_image_bgr, cv.COLOR_BGR2RGB)
        masked_image_hsv = cv.cvtColor(masked_image_bgr, cv.COLOR_BGR2HSV)
        masked_image_hsv = masked_image_hsv.astype(float)

        colors = masked_image_rgb.reshape((-1, 3)) / 255.0

        # convert to cylindrical hsv model
        hue = masked_image_hsv[:, :, 0].copy()
        sat = masked_image_hsv[:, :, 1].copy()
        masked_image_hsv[:, :, 0] = np.cos(hue / 180.0 * 2*np.pi) * sat
        masked_image_hsv[:, :, 1] = np.sin(hue / 180.0 * 2*np.pi) * sat
        hsv_values.append(masked_image_hsv.reshape((-1, 3)))
        color_values.append(colors)

        if logger.isEnabledFor(logging.DEBUG):
            # debug images for cv
            highlighted_hsv = cv.cvtColor(image, cv.COLOR_BGR2HSV).astype(float)
            highlighted_hsv_mask_view = np.ma.array(highlighted_hsv, mask=np.repeat(mask[..., None], 3, axis=2))
            highlighted_hsv_mask_view[:, :, 1] *= 0.2
            highlighted_hsv_mask_view[:, :, 2] *= 0.2
            highlighted_hsv = highlighted_hsv.astype(np.uint8)
            highlighted_bgr = cv.cvtColor(highlighted_hsv, cv.COLOR_HSV2BGR)
            steps.append(highlighted_bgr.copy())

            comp = np.vstack((
                np.hstack(tuple(steps[:2])),
                np.hstack(tuple(steps[2:]))
            ))

            short_edge_length = min(image.shape[0], image.shape[1])
            min_edge_length = int(short_edge_length * core.cv.MIN_SIDE_LENGTH_RATIO)
            max_edge_length = int(short_edge_length * core.cv.MAX_SIDE_LENGTH_RATIO)

            offset = 20
            cv.rectangle(comp, (offset, offset), (offset + min_edge_length, offset + min_edge_length), (0, 250, 100), 4)
            cv.rectangle(comp, (offset, offset), (offset + max_edge_length, offset + max_edge_length), (0, 100, 250), 4)

            comp = cv.resize(comp, (0,0), fx=OUTPUT_SCALE, fy=OUTPUT_SCALE)
            cv.imshow("image", comp)
            cv.waitKey()

    assert len(color_values) > 0, "Found no square-like shapes."

    color_values = np.concatenate(color_values, axis=0).reshape((-1, 3))
    hsv_values   = np.concatenate(hsv_values, axis=0).reshape((-1, 3)).astype(np.float)

    # do the actual clustering using pre-defined centers
    kmeans = KMeans(n_clusters=7, init=INITIAL_CENTERS, n_init=1)
    kmeans.fit(hsv_values)
    centers = kmeans.cluster_centers_

    if logger.isEnabledFor(logging.DEBUG):
        h, s, v = hsv_values[:, 0], hsv_values[:, 1], hsv_values[:, 2]
        axis.scatter(xs=centers[:,0], ys=centers[:,1], zs=centers[:,2], marker='s', s=200)
        axis.scatter(xs=h.flatten(), ys=s.flatten(), zs=v.flatten(), c=color_values, s=1)
        axis.set_xlim(-255, 255)
        axis.set_ylim(-255, 255)
        axis.set_zlim(0, 255)
        for idx, center in enumerate(centers):
            label = LABEL_MAP[idx].to_string()
            axis.text(center[0], center[1], center[2], label)

        plt.show()
    return kmeans, bounding_boxes

kmeans_color_detection(images)

Converts a list of images showing faces of a Rubik's cube to a list of numpy arrays containing the respecitve TileColors using computer vision and kmeans clustering.

Parameters:

Name Type Description Default
images List[numpy.ndarray]

A list of numpy arrays (BGR images) with one side of the cube shown in each of the image. Six images are expected in total.

required

Returns:

Type Description
List[numpy.ndarray]

A list of 3x3 numpy arrays, one for each input image, with the respective TileColors of the cube in the input image.

Source code in core/cv/clusters.py
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
def kmeans_color_detection(images: List[np.ndarray]) -> List[np.ndarray]:
    """
    Converts a list of images showing faces of a Rubik's cube to a list of numpy arrays containing the respecitve
    TileColors using computer vision and kmeans clustering.
    Args:
        images: A list of numpy arrays (BGR images) with one side of the cube shown in each of the image.
            Six images are expected in total.

    Returns:
        A list of 3x3 numpy arrays, one for each input image, with the respective TileColors of the cube in the input image.

    """
    assert len(images) == len(cube.FacePosition), f"Expected {len(cube.FacePosition)} face images but got: {len(images)}"

    kmeans, bounding_boxes = build_clusters(images)

    assert len(bounding_boxes) == len(images), f"Expected {len(images)} bounding boxes but got: {len(bounding_boxes)}"

    faces = []
    for idx, image in enumerate(images):
        image_hsv = cv.cvtColor(image, cv.COLOR_BGR2HSV)
        image_hsv = image_hsv.astype(float)

        # use the bounding box to extract each of the colors in the center
        bounding_box = bounding_boxes[idx]
        width, height = bounding_box[2], bounding_box[3]
        segment_width, segment_height = width / 3.0, height / 3.0

        face = np.full((3, 3), dtype=cube.TileColor, fill_value=-1)
        missing_segments = np.argwhere(face == -1)

        for location in missing_segments:
            # pick colour in the middle and predict its label using the clustering
            image_loc_y = int(bounding_box[1] + (location[0] + 0.5) * segment_height)
            image_loc_x = int(bounding_box[0] + (location[1] + 0.5) * segment_width)
            picked_colour = image_hsv[image_loc_y, image_loc_x]

            hue = picked_colour[0]
            sat = picked_colour[1]
            picked_colour[0] = np.cos(hue / 180.0 * 2*np.pi) * sat
            picked_colour[1] = np.sin(hue / 180.0 * 2*np.pi) * sat

            label = kmeans.predict([picked_colour])[0]
            color = LABEL_MAP[label]
            face[location[0], location[1]] = color

        faces.append(face)

    return faces