In [20]:
import time
def mul1(size=10000000):
  list1, list2 = list(range(size)), list(range(size))
  init_time = time.time()
  res = [x * y for x, y in zip(list1, list2)]
  return time.time() - init_time

In [19]:
import numpy as np
def mul2(size=10000000):
  array1, array2 = np.arange(size), np.arange(size)
  init_time = time.time()
  res = array1 * array2
  return time.time() - init_time

In [21]:
mul1()

2.033611536026001

In [22]:
mul2()

0.056636810302734375

In [17]:
type(range(1000))

range

In [25]:
def matrix_mul(a, b):
  rows_a, cols_a = len(a), len(a[0])
  rows_b, cols_b = len(b), len(b[0])
  assert cols_a == rows_b, '''Number of cols A must be equal to number of rows b'''
  result = [[0 for _ in range(cols_b)] for _ in range(rows_a)]
  for i in range(rows_a):
    for j in range(cols_b):
      for k in range(cols_a):
        result[i][j] += a[i][k] * b[k][j]
  return result

In [26]:
A = [[1, 2, 3], [4, 5, 6]]
B = [[1, 2], [3, 4], [5, 6]]
matrix_mul(A, B)

[[22, 28], [49, 64]]

In [27]:
a = np.array(A)
b = np.array(B)
np.dot(a, b)

array([[22, 28],
       [49, 64]])

In [28]:
def computeIOU(boxA, boxB):
  xA = max(boxA[0], boxB[0])
  yA = max(boxA[1], boxB[1])
  xB = min(boxA[2], boxB[2])
  yB = min(boxA[3], boxB[3])

  interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
  areaA = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
  areaB = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
  iou = interArea / float(areaA + areaB - interArea)
  return iou

In [29]:
boxA = [0, 0, 100, 100]
boxB = [50, 50, 150, 150]
computeIOU(boxA, boxB)

0.14611538677602381

In [31]:
def non_max_suppression(boxes, scores, threshold):
  sorted_indices = sorted(range(len(scores)), key=lambda x: scores[x], reverse=True)
  keep_indices = []
  while sorted_indices:
    max_index = sorted_indices.pop(0)
    keep_indices.append(max_index)
    for i in sorted_indices:
      iou = computeIOU(boxes[max_index], boxes[i])
      if iou > threshold:
        sorted_indices.remove(i)
  return keep_indices

In [37]:
dataset = np.array(['Ho Chi Minh',
                    'Da Nang', 'Ha Noi',
                    'Can Tho', 'Ha Noi'])
unique_cat = sorted(list(set(dataset)))
category_to_index = {
    category: index for index, category in enumerate(unique_cat)
}
one_hot_features = []
for data in dataset:
  one_hot_features.append(
      np.eye(len(unique_cat))[category_to_index[data]]
  )

one_hot_features, dataset

([array([0., 0., 0., 1.]),
  array([0., 1., 0., 0.]),
  array([0., 0., 1., 0.]),
  array([1., 0., 0., 0.]),
  array([0., 0., 1., 0.])],
 array(['Ho Chi Minh', 'Da Nang', 'Ha Noi', 'Can Tho', 'Ha Noi'],
       dtype='<U11'))

In [38]:
class Node:
  def __init__(self, point, left=None, right=None):
    self.point = point
    self.left = left
    self.right = right

In [47]:
def build_kd_tree(points, depth=0):
  if not points:
    return None

  k = len(points[0])
  axis = depth % k

  print(f"axis: {axis}")
  points.sort(key=lambda x: x[axis])
  median = len(points) // 2
  print(f"median: {median}")
  print(f"point: {points[median]}")
  print("_______________________________")
  return Node(
      point=points[median],
      left=build_kd_tree(points[:median], depth + 1),
      right=build_kd_tree(points[median + 1:], depth + 1)
  )

In [48]:
points = [(2, 3), (5, 4), (9, 6), (4, 7), (8, 1), (7, 2)]
root = build_kd_tree(points)

axis: 0
median: 3
point: (7, 2)
_______________________________
axis: 1
median: 1
point: (5, 4)
_______________________________
axis: 0
median: 0
point: (2, 3)
_______________________________
axis: 0
median: 0
point: (4, 7)
_______________________________
axis: 1
median: 1
point: (9, 6)
_______________________________
axis: 0
median: 0
point: (8, 1)
_______________________________


In [66]:
def square_distance(a, b):
  return sum((x - y) ** 2 for x, y in zip(a, b))**0.5

In [67]:
def closer_point(new_data, nearest_node, root_node):
  if not nearest_node: return root_node
  if not root_node: return nearest_node
  if square_distance(new_data, nearest_node) > square_distance(new_data, root_node):
    return root_node
  return nearest_node

In [70]:
def nearest_neighbor(node, point, depth=0, best=None):
  if not node: return best

  k = len(point)
  axis = depth % k

  next_branch, oposite_branch = None, None
  if point[axis] < node.point[axis]:
    next_branch, oposite_branch = node.left, node.right
  else:
    next_branch, oposite_branch = node.right, node.left
  best = closer_point(point, nearest_neighbor(next_branch, point, depth + 1, best), node.point)

  if (point[axis] - node.point[axis]) ** 2 < square_distance(point, best):
    best = closer_point(point, nearest_neighbor(oposite_branch, point, depth + 1, best), node.point)
  return best


In [71]:
point = (6, 3.4)
nearest_neighbor(root, point)

(5, 4)