# 搜索

## 顺序搜索

In [None]:
# 无序列表的顺序搜索
def sequentialSearch(alist, item):
    pos = 0
    found = False
    while pos < len(alist) and not found:
        if alist[pos] == item:
            found = True
        else:
            pos += 1
    return found

In [None]:
# 有序列表的顺序搜索
def orderedSequentialSearch(alist, item):
    pos = 0
    found = False
    stop = False
    while pos < len(alist) and not found and not stop:
        if alist[pos] == item:
            found = True
        else:
            if alist[pos] > item:
                stop = True
            else:
                pos += 1
    return found

## 二分搜索

In [None]:
# O(logn)
def binarySearch(alist, item):
    first = 0
    last = len(alist) - 1
    found = False
    
    # 一定要保留=号
    while first <= last and not found:
        midpoint = (first + last) // 2
        if alist[midpoint] == item:
            found = True
        else:
            if item < alist[midpoint]:
                last = midpoint - 1
            else:
                first = midpoint + 1
    return found

In [None]:
# 可以用递归 分治的思想解决
def binarySearch1(alist, item):
    if len(alist) == 0:
        return False
    else:
        midpoint = len(alist) // 2
        if alist[midpoint] == item:
            return True
        else:
            if item < alist[midpoint]:
                return binarySearch1(alist[:midpoint], item)
            else:
                return binarySearch1(alist[midpoint+1:], item)

## 散列

In [None]:
# 为字符串创建简单的散列函数
def hash(astring, tablesize):
    sum = 0
    for pos in range(len(astring)):
        sum = sum + ord(astring[pos])*(pos+1)
    return sum % tablesize

In [None]:
# 对于异序词, 可以得到相同的散列值, 那么可以增加权重
hash("tac", 11)
hash("cat", 11)

### 使用hashtable实现字典

In [32]:
class HashTable():
    def __init__(self):
        self.size = 11
        self.slots = [None] * self.size
        self.data = [None] * self.size
    
    def put(self, key, data):
        hashvalue = self.hashfunction(key, len(self.slots))
        
        if self.slots[hashvalue] == None:
            self.slots[hashvalue] = key
            self.data[hashvalue] = data
        else:
            # 修改value值
            if self.slots[hashvalue] == key:
                self.data[hashvalue] = data
            else:
                # 说明冲突了
                nextslot = self.rehash(hashvalue, len(self.slots))
                    # 有一种情况, 那就是有冲突的key 之前也已经赋值了
                while self.slots[nextslot] != None and self.slots[nextslot] != key: # 只有确定了key是那个可以key才赋值
                    nextslot = self.rehash(nextslot, len(self.slots))
                
                if self.slots[nextslot] == None:
                    self.slots[nextslot] = key
                    self.data[nextslot] = data
                elif self.slots[nextslot] == key:
                    self.data[nextslot] = data
            
    def hashfunction(self, key, size):
        return key % size
    
    def rehash(self, oldhash, size):
        return (oldhash + 1) % size
    
    def get(self, key):
        startslot = self.hashfunction(key, len(self.slots))
        
        data = None
        stop = False
        found = False
        position = startslot
        # 如果该地址的key为空, 那么也可以停止, 因为插入的时候, 是按顺序插入的
        while self.slots[position] != None and not found and not stop:
            if self.slots[position] == key:
                found = True
                data = self.data[position]
            else:
                position = self.rehash(position, len(self.slots))
                if position == startslot:
                    stop = True
        return data
    
    # 可以通过索引运算符自动调用get和set方法
    def __getitem__(self, key):
        return self.get(key)
    def __setitem__(self, key, data):
        self.put(key, data)

In [33]:
# test
H = HashTable()
H[54] = "cat"
H[26] = "dog"
H[93] = "lion"
H[17] = "tiger"
H[77] = "bird"
H[31] = "cow"
H[44] = "goat"
H[55] = "pig"
H[20] = "chicken"

In [34]:
H.slots
H.data

[77, 44, 55, 20, 26, 93, 17, None, None, 31, 54]

In [38]:
H[20] = "ducj"

# 排序

## 冒泡排序

In [2]:
# O(n2)
def bubbleSort(alist):
    # 第一次需要比较len - 1次
    for passnum in range(len(alist)-1, 0, -1):
        # 通过i取出下标
        for i in range(passnum):
            if alist[i] > alist[i+1]:
                temp = alist[i]
                alist[i] = alist[i+1]
                alist[i+1] = temp

In [9]:
# 修改后的短冒泡排序
# O()
def shortBubbleSort(alist):
    exchanges = True
    passnum = len(alist) - 1
    while passnum > 0 and exchanges:
        exchanges = False
        for i in range(passnum):
            if alist[i] > alist[i+1]:
                exchanges = True
                temp = alist[i]
                alist[i] = alist[i+1]
                alist[i+1] = temp
        passnum -= 1
    return alist

## 选择排序

In [11]:
# 在冒泡排序基础上的改进, 每轮只调换一次

In [13]:
# 交换次数减少了, 但是比较次数没变, 所以时间复杂度没变
def selectionSort(alist):
    for fillslot in range(len(alist)-1, 0, -1):
        positionOfMax = 0
        for location in range(1, fillslot+1):
            if alist[location] > alist[positionOfMax]:
                positionOfMax = location
        temp = alist[fillslot]
        alist[fillslot] = alist[positionOfMax]
        alist[positionOfMax] = temp
    return alist

## 插入排序

In [16]:
# 在较低的一端维护列表, 实现插入操作
# 如何维护当前的结构, 通过向右蠕动的方式
def insertionSort(alist):
    # 从index为1开始, 到最后一个元素为止
    for index in range(1, len(alist)):
        
        currentvalue = alist[index]
        position = index
        # 或者position > 0
        while position >= 1 and alist[position - 1] > currentvalue:
            alist[position] = alist[position - 1]
            position = position - 1
        alist[position] = currentvalue
        
    return alist

## 希尔排序

In [17]:
# 对插入排序做的改进, 将列表分成数个子列表, 每个子列表都使用插入排序
def shellSort(alist):
    # sublistcount为间隔, 也是组数
    
    sublistcount = len(alist) // 2
    while sublistcount > 0:
        for startposition in range(sublistcount):
            gapInsertionSort(alist, startposition, sublistcount)
        print("After increments of size", sublistcount, "The list is", alist)
        
        sublistcount = sublistcount // 2

# 子序列的插入排序
def gapInsertionSort(alist, start, gap):
    for i in range(start+gap, len(alist)):
        currentvalue = alist[i]
        position = i
        while position >= gap and alist[position-gap] > currentvalue:
            alist[position] = alist[position-gap]
            position = position - gap
        alist[position] = currentvalue

## 归并排序

In [18]:
# 通过分治的思想完成排序

In [1]:
def mergeSort(alist):
    if len(alist) > 1:
        mid = len(alist) // 2
        lefthalf = alist[:mid]
        righthalf = alist[mid:]
        
        mergeSort[lefthalf]
        mergeSort[righthalf]
        
        # 双指针的思想
        i = 0
        j = 0
        k = 0
        while i < len(lefthalf) and j < len(righthalf):
            if lefthalf[i] < righthalf[j]:
                alist[k] = lefthalf[i]
                i = i + 1
            else:
                alist[k] = righthalf[j]
                j += 1
            k += 1
        
        while i < len(lefthalf):
            alist[k] = lefthalf[i]
            i += 1
            k += 1
        
        while j < len(righthalf):
            alist[k] = righthalf[j]
            j += 1
            k += 1
    print("Merging " + alist)