给定一个由n个互异的关键字组成的有序序列K={k1<k2<k3<,……,<kn}和它们被查询的概率P={p1,p2,p3,……,pn},要求构造一棵二叉查找树T,使得查询所有元素的总的代价最小。对于一个搜索树,当搜索的元素在树内时,表示搜索成功。当不在树内时,表示搜索失败,用一个“虚叶子节点”来标示搜索失败的情况,因此需要n+1个虚叶子节点{d0<d1<……<dn},对于应di的概率序列是Q={q0,q1,……,qn}。其中d0表示搜索元素小于k1的失败结果,dn表示搜索元素大于kn的失败情况。di(0<i<n)表示搜索节点在ki和k(i+1)之间时的失败情况。因此有如下公式:
由每个关键字和每个虚拟键被搜索的概率,可以确定在一棵给定的二叉查找树T内一次搜索的期望代价。设一次搜索的实际代价为检查的节点个数,即在T内搜索所发现的节点的深度加上1。所以在T内一次搜索的期望代价为:
需要注意的是:一棵最优二叉查找树不一定是一棵整体高度最小的树,也不一定总是把最大概率的关键字放在根部。
动态规划求解过程
1)最优二叉查找树的结构
如果一棵最优二叉查找树T有一棵包含关键字ki,……,kj的子树T',那么这棵子树T’对于对于关键字ki,……kj和虚拟键di-1,……,dj的子问题也必定是最优的。
2)一个递归解
定义e[i,j]为搜索一棵包含关键字ki,……,kj的最优二叉查找树的期望代价,则分类讨论如下:
当j=i-1时,说明此时只有虚拟键di-1,故e[i,i-1] = qi-1
当j≥i时,需要从ki,……,kj中选择一个跟kr,然后用关键字ki,……,kr-1来构造一棵最优二叉查找树作为左子树,用关键字kr+1,……,kj来构造一棵最优二叉查找树作为右子树。定义一棵有关键字ki,……,kj的子树,定义概率的总和为:
因此如果kr是一棵包含关键字ki,……,kj的最优子树的根,则有:
故e[i,j]重写为:
最终的递归式如下:
#include<iostream> #include<vector> //#include<utility> //#include<algorithm> #include<iterator> using namespace std; pair<vector<vector<double>>, vector<vector<int>>> optimal_binary_search_tree(double *p,double *q,int n) { vector<vector<double>> e(n + 2, vector<double>(n + 1, 0)); vector<vector<int>> root(n + 1, vector<int>(n + 1, 0)); vector<vector<double>> w(n + 2, vector<double>(n + 1, 0)); for (int i = 1; i <= n + 1; ++i) { e[i][i - 1] = q[i - 1]; w[i][i - 1] = q[i - 1]; } for (int len = 1; len <= n; ++len) { for (int i = 1; i <= n-len + 1; ++i) { int j = i + len - 1; e[i][j] = 100.0; w[i][j] = w[i][j - 1] + p[j] + q[j]; for (int r = i; r <= j; ++r) { double t = e[i][r - 1] + e[r + 1][j] + w[i][j]; if (t < e[i][j]) { e[i][j] = t; root[i][j] = r; } } } } return make_pair(e, root); } void Construct_Optimal_BST(vector<vector<int>> root, int i, int j, bool flag) { if (flag == 0) { cout << "k" << root[i][j] << " 是根" << endl; flag = 1; } int r = root[i][j]; //如果左子树是叶子 if (r - 1<i) { cout << "d" << r - 1 << " is the left child of " << "K" << r << endl; } //如果左子树不是叶子 else { cout << "k" << root[i][r - 1] << " is the left child of " << "K" << r << endl; Construct_Optimal_BST(root, i, r - 1, 1); } //如果右子树是叶子 if (r >= j) { cout << "d" << j << " is the right child of " << "K" << r << endl; } //如果右子树不是叶子 else { cout << "k" << root[r + 1][j] << " is the right child of " << "K" << r << endl; Construct_Optimal_BST(root, r + 1, j, 1); } } void construct_optimal_bst2(vector<vector<int>> root, int i, int j) { int n = j - i + 1; if (i == 1 && j == n) cout << "k" << root[1][n] << "是根" << endl; if (i<j) { int r = root[i][j]; if (r != i) cout << "k" << root[i][r - 1] << "是k" << r << "的左孩子" << endl; construct_optimal_bst2(root, i, r - 1); if (r != j) cout << "k" << root[r + 1][j] << "是k" << r << "的右孩子" << endl; construct_optimal_bst2(root, r + 1, j); } if (i == j) { cout << "d" << i - 1 << "是k" << i << "左孩子" << endl; cout << "d" << i << "是k" << i << "右孩子" << endl; } if (i>j) cout << "d" << j << "是k" << j << "右孩子" << endl; } int main() { double p[6] = { 0, 0.15, 0.1, 0.05, 0.1, 0.2 }; double q[6] = { 0.05, 0.1, 0.05, 0.05, 0.05, 0.1 }; vector<vector<double>> e(5 + 2, vector<double>(5 + 1, 0)); vector<vector<int>> root(5 + 1, vector<int>(5 + 1, 0)); int n = sizeof(p) / sizeof(double); cout << n << endl; e = optimal_binary_search_tree(p, q, 5).first; root = optimal_binary_search_tree(p, q, 5).second; cout <<"最优二叉树的代价是:" <<e[1][5] << endl; cout << "各个子树的期望代价如下所示:" << endl; for (int i = 1; i <= 5 + 1; ++i) { copy(e[i].begin(), e[i].end(), ostream_iterator<double>(cout, " ")); cout << endl; } cout << "-----------------------------------------" << endl; for (int i = 1; i <= 5 + 1; ++i) { for (int j = i-1; j <= 5; ++j) { cout << e[i][j] << " "; } cout << endl; } cout << endl; cout << "各个子树根如下表所示:" << endl; for (int i = 1; i <= 5; ++i) { copy(root[i].begin(), root[i].end(), ostream_iterator<int>(cout, " ")); cout << endl; } cout << endl; cout << "--------------------------------------" << endl; for (int i = 1; i <= 5 ; ++i) { for (int j = i ; j <= 5; ++j) { cout << root[i][j] << " "; } cout << endl; } cout << "构造的最优二叉查找树如下所示:" << endl; Construct_Optimal_BST(root, 1, 5,0 ); cout << "--------------------------------------" << endl; construct_optimal_bst2(root, 1, 5); }