标签:
In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters ‘a‘, ‘x‘, ‘u‘ and ‘z‘ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a‘=0, ‘x‘=10, ‘u‘=110, ‘z‘=111}, or in another way as {‘a‘=1, ‘x‘=01, ‘u‘=001, ‘z‘=000}, both compress the string into 14 bits. Another set of code can be given as {‘a‘=0, ‘x‘=11, ‘u‘=100, ‘z‘=101}, but {‘a‘=0, ‘x‘=01, ‘u‘=011, ‘z‘=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2 <= N <= 63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i] is a character chosen from {‘0‘ - ‘9‘, ‘a‘ - ‘z‘, ‘A‘ - ‘Z‘, ‘_‘}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (<=1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i] is the i-th character and code[i] is a string of ‘0‘s and ‘1‘s.
Output Specification:
For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.
Sample Input:
7 A 1 B 1 C 1 D 3 E 3 F 6 G 6 4 A 00000 B 00001 C 0001 D 001 E 01 F 10 G 11 A 01010 B 01011 C 0100 D 011 E 10 F 11 G 00 A 000 B 001 C 010 D 011 E 100 F 101 G 110 A 00000 B 00001 C 0001 D 001 E 00 F 10 G 11
Sample Output:
Yes Yes No No
这道题我没有构造哈夫曼树出来。我的想法是,只需要算出哈夫曼的WPL还有看每个节点是否是叶子节点就可以了。
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
typedef struct Ele
{
int freq;
char ch;
int lchild;
int rchild;
} ELE;
void CreateQue(ELE *q, int n);
void PercolatDown(ELE *q, int n, int i);
ELE DeleteMin(ELE *q, int *n);
void Insert(ELE *q, ELE X, int *n);
int main(void)
{
int N, M, NTmp;
ELE queue[64] = { 0 };
ELE EleTmp1, EleTmp2, EleTmp3;
char chstr[6] = {0}, ch;
int i, j, k, t, chstrlen = 0; //chstrlen:算出检查字符串的长度。比如 A 00000,那么chstrlen为5。
int WPL, pow, shit; //WPL:从输入的队列算出WPL;pow算出要检查的数据的WPL;
//shit保存节点频率
int dafeiji = 0; //每次检查看节点是否是叶子节点。0表示无,1表示是某个叶子节点的路径,2表示叶子节点
int fuck[128]; //构造一颗满二叉树,用来查询节点是不是
//在某一节点的路径上
//所有字符节点都为叶子点时,需要2^k个节点
//其中k为层数, 2^(k-1) >= N, k取最小值
struct {
char ch;
int freq;
} rilegou[63]; //存放字符和频率的输入,在算WPL的时候使用
scanf("%d", &N);
getchar();
for (i = 1; i <= N; i++)
{
scanf("%c", &(queue[i].ch));
rilegou[i -1].ch = queue[i].ch;
getchar();
scanf("%d", &(queue[i].freq));
rilegou[i -1].freq = queue[i].freq;
getchar();
}
NTmp = N;
CreateQue(queue, N);
WPL = 0;
while (NTmp > 1) //算出WPL,不用构造哈夫曼树,用队列来模拟。
{
EleTmp1 = DeleteMin(queue, &NTmp);
EleTmp2 = DeleteMin(queue, &NTmp);
EleTmp3.freq = EleTmp1.freq + EleTmp2.freq;
EleTmp3.ch = -1;
WPL += EleTmp3.freq;
Insert(queue, EleTmp3, &NTmp);
}
scanf("%d", &M);
getchar();
for (i = 0; i < M; i++)
{
memset(fuck, 0, sizeof(fuck));
dafeiji = 0;
pow = 0;
shit = 0;
for (j = 0; j < N; j++) //检查是否是叶子节点。下面的for循环值进行了strlen-1。因为要最后一个要检查是否是
{ //叶子节点。应该可以用其他的方法实现。
scanf("%c %s", &ch, chstr);
while (getchar() != ‘\n‘)
;
if (dafeiji != 1)
{
chstrlen = strlen(chstr);
if (chstr[chstrlen-1] == ‘\n‘)
{
chstr[chstrlen] = 0;
chstrlen--;
}
for (k = 0; k < N; k++)
if (rilegou[k].ch == ch)
{
shit = rilegou[k].freq;
break;
}
pow += chstrlen * shit;
t = 1;
for (k = 0; k < chstrlen-1; k++)
{
if (chstr[k] == ‘0‘)
t *= 2;
else
t = t * 2 + 1;
if (fuck[t] == 2)
{
dafeiji = 1;
break;
}
else
{
fuck[t] = 1;
}
}
if (chstr[k] == ‘0‘)
t *= 2;
else
t = t * 2 + 1;
if (fuck[t] != 0)
dafeiji = 1;
else
fuck[t] = 2;
}
}
if (pow == WPL && dafeiji == 0)
printf("Yes\n");
else
printf("No\n");
}
return 0;
}
void CreateQue(ELE *q, int n)
{
int i = 0;
for (i = n / 2; i > 0; i--)
{
PercolatDown(q, n, i);
}
}
void PercolatDown(ELE *q, int n, int i)
{
int j = 0;
int tmp = 0;
ELE EleTmp;
for (j = i; j * 2 <= n; j = tmp)
{
tmp = j * 2;
if (n != tmp && q[tmp].freq > q[tmp + 1].freq)
{
tmp++;
}
if (q[j].freq > q[tmp].freq)
{
EleTmp = q[j];
q[j] = q[tmp];
q[tmp] = EleTmp;
}
else
{
break;
}
}
}
ELE DeleteMin(ELE *q, int *n)
{
ELE res = { 0 };
int NTmp = *n;
res = q[1];
q[1] = q[NTmp];
NTmp--;
PercolatDown(q, NTmp, 1);
*n = NTmp;
return res;
}
void Insert(ELE *q, ELE X, int *n)
{
int NTmp = *n;
int i = 0;
for (i = *n + 1; i > 1; i = i / 2)
{
if (q[i / 2].freq >= X.freq)
{
q[i] = q[i / 2];
}
else
{
break;
}
}
q[i] = X;
(*n)++;
}
标签:
原文地址:http://www.cnblogs.com/Amin000/p/4773371.html