winRAR真难用,我决定自创一个(筑基期)适合无基础
筑基期简介
筑基期主要阐述的是对于压缩和解压所需要的原理,主要是文字描述与少量图片分析
提供完整代码,可不学习直接使用 博主空间
https://blog.csdn.net/JOElib?spm=1011.2266.3001.5343强哥的查找算法
https://blog.csdn.net/JOElib/article/details/123848806?spm=1001.2014.3001.5501
定义概述
路径
路径是从根节点如何走到目标节点,中间的过程被称为路径
路径的长度
路径的长度是在走的过程中,中间经历的节点数,如第L层的节点的路径长度是L-1
节点的权
节点的权是节点中被赋予了某个特殊意义的数字,该数字被称为节点的权
带权路径长度
用节点的路径大小乘上节点的权,乘积被称为带权路径
树的带权路径长度
树的带权路径表示所有叶子节点的带权路径之和,和被称为树的带权路径,树的带权路径的简写是WPL
哈夫曼树
哈夫曼树是一种特殊的二叉树,这种二叉树的WPL最小,即树的带权路径最小的二叉树就被称为哈夫曼树,其特点是越大的权离节点越近。
存储方式
定长编码
以字符串为例,按照每一个字符的ASCII码对应的二进制补码,原封不动的存储,被我们称为定长编码
变长编码
以字符串为例,先统计里面每一个字符出现的元素个数,先将他们排序,按照排序顺序,给较大的ASCII编一个较小的二进制编码,利用这些编码去存储
缺点:该编码读取会产生歧异,如01 与011编码,他们两个前缀相同,读取的是一个一个去读的,假设我们想要的是011,系统可能读成了01
前缀编码
前缀编码,即每一个变长编码的前缀都不一样,可避免上述读取产生歧异的缺点
哈夫曼编码
哈夫曼编码是一种特殊的前缀编码,根据路径从而得到对应的哈夫曼编码
完整代码
package datastructure.chapter04.tree.huffman.huffmancoding;/ * Created with IntelliJ IDEA. * Description: * User: 江骏杰 * Date: 2022-03-29 * Time: 21:38 */public class Node implements Comparable { public Byte ascii; public int value; public Node left; public Node right; public Node(Byte ascii,int value) { this.ascii = ascii; this.value = value; } public String toString() { return "Node[ascii = " + ascii + " value = " + value + "]"; } public int compareTo(Node node) { return value - node.value; }}package datastructure.chapter04.tree.huffman.huffmancoding;import java.io.*;import java.util.*;/ * Created with IntelliJ IDEA. * Description: * User: 江骏杰 * Date: 2022-03-29 * Time: 21:37 */public class HuffmanCode implements Serializable { @Serial private static final long serialVersionUID = 4420; private static final HashMap HUFFMAN_CODES = new HashMap(); private static final StringBuffer STRING_BUFFER = new StringBuffer(); private static int lastCount; public static byte[] huffmanCodeZip(String s) { var content = s.getBytes(); var list = getNodes(content); var huffmanTreeRoot = createHuffmanTree(list); getCodes(huffmanTreeRoot,"",STRING_BUFFER); return zip(content,HUFFMAN_CODES); } public static byte[] huffmanCodeZip(byte[] content) { var list = getNodes(content); var huffmanTreeRoot = createHuffmanTree(list); getCodes(huffmanTreeRoot,"",STRING_BUFFER); return zip(content,HUFFMAN_CODES); } public static List getNodes(byte[] content) { var map = new HashMap(); var list = new ArrayList(); for (var item : content) { var count = map.get(item); if (count == null) { map.put(item,1); }else { map.put(item,++count); } } for (var entry : map.entrySet()) { list.add(new Node(entry.getKey(),entry.getValue())); } return list; } public static Node createHuffmanTree(List list) { while (list.size() > 1) { Collections.sort(list); var leftNode = list.get(0); var rightNode = list.get(1); var parentNode = new Node(null, leftNode.value + rightNode.value); parentNode.left = leftNode; parentNode.right = rightNode; list.add(parentNode); list.remove(1); list.remove(0); } return list.get(0); } public static void getCodes(Node node,String code,StringBuffer strBuf) { var curStrBuf = new StringBuffer(strBuf); curStrBuf.append(code); if (node != null) { if (node.ascii == null) { getCodes(node.left,"0",curStrBuf); getCodes(node.right,"1",curStrBuf); }else { HUFFMAN_CODES.put(node.ascii,curStrBuf.toString()); } } } public static byte[] zip(byte[] content,Map huffmanCodes) { var strBuf = new StringBuilder(); for (var item : content) { strBuf.append(huffmanCodes.get(item)); } var len = (strBuf.length() + 7) / 8; var huffmanCodesBytes = new byte[len]; String str; var index = 0; for (int i = 0; i < strBuf.length(); i+= 8) { if (i + 8 < strBuf.length()) { str = strBuf.substring(i,i+8); }else { str = strBuf.substring(i); lastCount = str.length(); } huffmanCodesBytes[index++] = (byte) Integer.parseInt(str,2); } return huffmanCodesBytes; } public static String byteToString(byte b,boolean flag) { int temp = b; temp |= 256; String str = Integer.toBinaryString(temp); if (flag) { return str.substring(str.length() - 8); } return str.substring(str.length() - lastCount); } public static byte[] decode(byte[] huffmanCodesBytes,Map huffmanCodes) { var strBil = new StringBuilder(); for (int i = 0; i < huffmanCodesBytes.length; i++) { boolean flag = (i != huffmanCodesBytes.length -1); strBil.append(byteToString(huffmanCodesBytes[i],flag)); } var map = new HashMap(); for (var entry : huffmanCodes.entrySet()) { map.put(entry.getValue(),entry.getKey()); } var list = new ArrayList(); for (int i = 0; i < strBil.length();) { Byte b ; var count = 1; while ((b = map.get(strBil.substring(i, i + count))) == null) { count++; } list.add(b); i += count; } var content = new byte[list.size()]; for (int i = 0; i < content.length; i++) { content[i] =list.get(i); } return content; } public static String decode(byte[] huffmanBytes) { return new String(decode(huffmanBytes,HUFFMAN_CODES)); } public static void fileZip(String srcPath,String desPath) { try(var fis = new FileInputStream(srcPath); var fos = new FileOutputStream(desPath); var oos = new ObjectOutputStream(fos) ) { var content = new byte[fis.available()]; fis.read(content); byte[] huffmanBytes = huffmanCodeZip(content); oos.writeObject(huffmanBytes); oos.writeObject(HUFFMAN_CODES); oos.writeObject(lastCount); oos.flush(); fos.flush(); }catch (Exception e) { e.printStackTrace(); } } public static void fileDeZip(String zipPath,String desPath) { try(var fis = new FileInputStream(zipPath); var ois = new ObjectInputStream(fis); var fos = new FileOutputStream(desPath) ) { var huffmanBytes = (byte[])ois.readObject(); var huffmanCodes = (Map)ois.readObject(); lastCount = (int)ois.readObject(); var content = decode(huffmanBytes,huffmanCodes); fos.write(content); fos.flush(); }catch (Exception e) { e.printStackTrace(); } }}
结论
欲要完成一个代码,必先了解他的原理,通过这次原理,我们可以窥探一二
下篇,我们会开始创建哈夫曼树