k-NN

Definition

Definition
Explanation
Guidance
Tips

The k-Nearest Neighbors (k-NN) algorithm is a simple yet powerful supervised machine learning algorithm used for classification and regression tasks. It works by finding the k closest data points in the training set to a given input data point and then making predictions based on the labels or values of those neighboring points

Practice

Practice
Solution

knn_predict(input_point, training_data, k, task_type):
    // Step 1: Calculate distances
    distances = []
    for point in training_data:
        distance = calculate_distance(input_point, point)
        distances.append((point, distance))

    // Step 2: Sort distances
    sorted_distances = sort(distances, by=distance_value)

    // Step 3: Select k nearest neighbors
    nearest_neighbors = sorted_distances[:k]

    // Step 4: Perform prediction based on task type
    if task_type == "classification":
        predicted_class = majority_vote(nearest_neighbors)
        return predicted_class
    elif task_type == "regression":
        predicted_value = calculate_average(nearest_neighbors)
        return predicted_value

calculate_distance(point1, point2):
    // Implementation of distance calculation (e.g., Euclidean, Manhattan)
    // Returns the distance value

distance_value(element):
    // Returns the distance value of the element (used for sorting)

majority_vote(neighbors):
    // Step 5a: Count occurrences of each class
    class_counts = {}
    for neighbor in neighbors:
        neighbor_class = neighbor[0].class
        if neighbor_class in class_counts:
            class_counts[neighbor_class] += 1
        else:
            class_counts[neighbor_class] = 1

    // Step 5b: Find the class with the highest count
    max_count = 0
    majority_class = None
    for cls, count in class_counts.items():
        if count > max_count:
            max_count = count
            majority_class = cls
    return majority_class

calculate_average(neighbors):
    // Step 5: Calculate the average (or weighted average) of target values
    total_value = 0
    for neighbor in neighbors:
        total_value += neighbor[0].value  // Assuming the value is stored in neighbor[0]
    average_value = total_value / len(neighbors)
    return average_value

package main

import (
    "math"
    "sort"
)

type Point struct {
    X float64
    Y float64
}

type ByDistance struct {
    Points []Point
    Target Point
}

func (bd ByDistance) Len() int {
    return len(bd.Points)
}

func (bd ByDistance) Swap(i, j int) {
    bd.Points[i], bd.Points[j] = bd.Points[j], bd.Points[i]
}

func (bd ByDistance) Less(i, j int) bool {
    distI := math.Sqrt(math.Pow(bd.Points[i].X-bd.Target.X, 2) + math.Pow(bd.Points[i].Y-bd.Target.Y, 2))
    distJ := math.Sqrt(math.Pow(bd.Points[j].X-bd.Target.X, 2) + math.Pow(bd.Points[j].Y-bd.Target.Y, 2))
    return distI < distJ
}

func KNN(data []Point, target Point, k int) []Point {
    sortedData := ByDistance{Points: data, Target: target}
    sort.Sort(sortedData)
    return sortedData.Points[:k]
}

import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;

class Point {

  double x, y;

  Point(double x, double y) {
    this.x = x;
    this.y = y;
  }
}

public class KNN {

  static List<Point> KNN(List<Point> data, Point target, int k) {
    data.sort(new ByDistance(target));
    return data.subList(0, k);
  }

  static class ByDistance implements Comparator<Point> {

    Point target;

    ByDistance(Point target) {
      this.target = target;
    }

    double distance(Point p) {
      return Math.sqrt(Math.pow(p.x - target.x, 2) + Math.pow(p.y - target.y, 2));
    }

    public int compare(Point p1, Point p2) {
      return Double.compare(distance(p1), distance(p2));
    }
  }
}

class Point {
  constructor(x, y) {
    this.x = x;
    this.y = y;
  }
}

function distance(p1, p2) {
  return Math.sqrt(Math.pow(p1.x - p2.x, 2) + Math.pow(p1.y - p2.y, 2));
}

function KNN(data, target, k) {
  data.sort((a, b) => distance(a, target) - distance(b, target));
  return data.slice(0, k);
}

data class Point(val x: Double, val y: Double)

class KNN {
    companion object {
        fun knn(data: List<Point>, target: Point, k: Int): List<Point> {
            return data.sortedBy { distance(it, target) }.take(k)
        }

        private fun distance(p1: Point, p2: Point): Double {
            return Math.sqrt(Math.pow(p1.x - p2.x, 2.0) + Math.pow(p1.y - p2.y, 2.0))
        }
    }
}

import math

class Point:
    def __init__(self, x, y):
        self.x = x
        self.y = y

def distance(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def KNN(data, target, k):
    sorted_data = sorted(data, key=lambda x: distance(x, target))
    return sorted_data[:k]

#[derive(Debug)]
struct Point {
    x: f64,
    y: f64,
}

fn distance(p1: &Point, p2: &Point) -> f64 {
    ((p1.x - p2.x).powi(2) + (p1.y - p2.y).powi(2)).sqrt()
}

fn knn(data: &[Point], target: &Point, k: usize) -> Vec<Point> {
    let mut sorted_data = data.to_vec();
    sorted_data.sort_by(|a, b| distance(a, target).partial_cmp(&distance(b, target)).unwrap());
    sorted_data[..k].to_vec()
}

class Point {
  constructor(
    public x: number,
    public y: number,
  ) {}
}

function distance(p1: Point, p2: Point): number {
  return Math.sqrt(Math.pow(p1.x - p2.x, 2) + Math.pow(p1.y - p2.y, 2));
}

function KNN(data: Point[], target: Point, k: number): Point[] {
  return data
    .sort((a, b) => distance(a, target) - distance(b, target))
    .slice(0, k);
}

Definition​

Practice​

Definition

Practice