> 技术文档 > c# openxml 打开加密 的word读取内容

c# openxml 打开加密 的word读取内容

using System;using System.IO;using System.Linq;using System.Text;using DocumentFormat.OpenXml;using DocumentFormat.OpenXml.Packaging;using DocumentFormat.OpenXml.Wordprocessing;/// /// 使用OpenXML获取文档内容,替代Aspose方式/// /// 文档路径/// 密码/// 文档内容字符串public static string GetWordContentByOpenXml(string path, string password){ try { using (var document = WordprocessingDocument.Open(path, false, new OpenSettings() { Password = password })) { if (document.MainDocumentPart?.Document?.Body == null) return null; // 创建StringBuilder来存储文档主体内容 var contentBuilder = new StringBuilder(); // 获取文档主体,排除页眉页脚 var body = document.MainDocumentPart.Document.Body; // 提取主文档内容(不包括页眉页脚) ExtractBodyContent(body, contentBuilder); // 获取原始内容 string contentWithoutHeaderFooter = contentBuilder.ToString(); // 应用内容清理和格式化 string content = CleanContent(contentWithoutHeaderFooter); // 处理特定的截取逻辑 int index = content.LastIndexOf(\"限公司第\"); if (index > 0) { return content.Substring(0, index).Trim(); } else { return content; } } } catch (Exception ex) { LogManager.WriteError(\"GetWordContentByOpenXml()\", ex.StackTrace?.ToString()); return null; }}/// /// 提取文档主体内容,排除页眉页脚/// /// 文档主体/// 内容构建器private static void ExtractBodyContent(Body body, StringBuilder contentBuilder){ // 遍历文档主体中的所有元素 foreach (var element in body.Elements()) { ExtractElementContent(element, contentBuilder); }}/// /// 递归提取元素内容/// /// OpenXML元素/// 内容构建器private static void ExtractElementContent(OpenXmlElement element, StringBuilder contentBuilder){ switch (element) { case Paragraph paragraph: ExtractParagraphContent(paragraph, contentBuilder); contentBuilder.AppendLine(); // 段落后换行 break;  case Table table: ExtractTableContent(table, contentBuilder); break;  case SectionProperties _: // 跳过节属性,这些通常包含页眉页脚引用 break;  default: // 递归处理其他容器元素 foreach (var childElement in element.Elements()) { ExtractElementContent(childElement, contentBuilder); } break; }}/// /// 提取段落内容/// /// 段落元素/// 内容构建器private static void ExtractParagraphContent(Paragraph paragraph, StringBuilder contentBuilder){ foreach (var run in paragraph.Elements<Run>()) { foreach (var text in run.Elements<Text>()) { contentBuilder.Append(text.Text); } // 处理制表符 foreach (var tab in run.Elements<TabChar>()) { contentBuilder.Append(\"\\t\"); } // 处理换行符 foreach (var br in run.Elements<Break>()) { contentBuilder.AppendLine(); } }}/// /// 提取表格内容/// /// 表格元素/// 内容构建器private static void ExtractTableContent(Table table, StringBuilder contentBuilder){ foreach (var row in table.Elements<TableRow>()) { foreach (var cell in row.Elements<TableCell>()) { foreach (var paragraph in cell.Elements<Paragraph>()) { ExtractParagraphContent(paragraph, contentBuilder); } contentBuilder.Append(\"\\t\"); // 单元格间用制表符分隔 } contentBuilder.AppendLine(); // 表格行后换行 }}/// /// 清理和格式化内容,模拟Aspose的清理功能/// /// 原始内容/// 清理后的内容private static string CleanContent(string content){ if (string.IsNullOrEmpty(content)) return string.Empty; // 移除多余的空白字符(模拟Tool.TrimAll功能) content = System.Text.RegularExpressions.Regex.Replace(content, @\"\\s+\", \" \"); content = content.Trim(); // 移除多余的换行符 content = System.Text.RegularExpressions.Regex.Replace(content, @\"\\n\\s*\\n\", \"\\n\"); // 移除Aspose评估版本的水印文本(虽然OpenXML不会有,但保持兼容性) content = content.Replace(\"EvaluationOnly.CreatedwithAspose.Words.Copyright2003-2024AsposePtyLtd.\", \"\"); // 移除其他可能的控制字符 content = System.Text.RegularExpressions.Regex.Replace(content, @\"[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F]\", \"\"); return content.Trim();}/// /// 检查文档是否需要密码/// /// 文档路径/// 是否需要密码public static bool IsPasswordRequired(string path){ try { using (var document = WordprocessingDocument.Open(path, false)) { // 如果能正常打开,说明不需要密码 return false; } } catch (OpenXmlPackageException ex) { // 如果抛出密码相关异常,说明需要密码 return ex.Message.Contains(\"password\") || ex.Message.Contains(\"encrypted\") || ex.Message.Contains(\"protected\"); } catch { // 其他异常可能也表示需要密码 return true; }}/// /// 增强版本:支持更多文档处理选项/// /// 文档路径/// 密码/// 是否包含超链接文本/// 是否包含脚注/// 文档内容public static string GetWordContentByOpenXmlAdvanced(string path, string password, bool includeHyperlinks = false, bool includeFootnotes = false){ try { using (var document = WordprocessingDocument.Open(path, false, new OpenSettings() { Password = password })) { if (document.MainDocumentPart?.Document?.Body == null) return null; var contentBuilder = new StringBuilder(); var body = document.MainDocumentPart.Document.Body; // 提取主文档内容 ExtractBodyContentAdvanced(body, contentBuilder, includeHyperlinks); // 如果需要包含脚注 if (includeFootnotes && document.MainDocumentPart.FootnotesPart != null) { ExtractFootnotesContent(document.MainDocumentPart.FootnotesPart, contentBuilder); } string contentWithoutHeaderFooter = contentBuilder.ToString(); string content = CleanContent(contentWithoutHeaderFooter); // 应用特定的截取逻辑 int index = content.LastIndexOf(\"公司第\"); if (index > 0) { return content.Substring(0, index).Trim(); } else { return content; } } } catch (Exception ex) { LogManager.WriteError(\"GetWordContentByOpenXmlAdvanced()\", ex.StackTrace?.ToString()); return null; }}/// /// 高级内容提取,支持超链接等/// private static void ExtractBodyContentAdvanced(Body body, StringBuilder contentBuilder, bool includeHyperlinks){ foreach (var element in body.Elements()) { if (element is Paragraph paragraph) { ExtractParagraphContentAdvanced(paragraph, contentBuilder, includeHyperlinks); contentBuilder.AppendLine(); } else if (element is Table table) { ExtractTableContentAdvanced(table, contentBuilder, includeHyperlinks); } else if (!(element is SectionProperties)) { // 递归处理其他元素 foreach (var childElement in element.Elements()) { ExtractBodyContentAdvanced(new Body(childElement), contentBuilder, includeHyperlinks); } } }}/// /// 高级段落内容提取/// private static void ExtractParagraphContentAdvanced(Paragraph paragraph, StringBuilder contentBuilder, bool includeHyperlinks){ foreach (var element in paragraph.Elements()) { if (element is Run run) { foreach (var text in run.Elements<Text>()) { contentBuilder.Append(text.Text); } } else if (element is Hyperlink hyperlink && includeHyperlinks) { foreach (var run2 in hyperlink.Elements<Run>()) { foreach (var text in run2.Elements<Text>()) {  contentBuilder.Append(text.Text); } } } }}/// /// 高级表格内容提取/// private static void ExtractTableContentAdvanced(Table table, StringBuilder contentBuilder, bool includeHyperlinks){ foreach (var row in table.Elements<TableRow>()) { foreach (var cell in row.Elements<TableCell>()) { foreach (var paragraph in cell.Elements<Paragraph>()) { ExtractParagraphContentAdvanced(paragraph, contentBuilder, includeHyperlinks); } contentBuilder.Append(\"\\t\"); } contentBuilder.AppendLine(); }}/// /// 提取脚注内容/// private static void ExtractFootnotesContent(FootnotesPart footnotesPart, StringBuilder contentBuilder){ if (footnotesPart.Footnotes != null) { contentBuilder.AppendLine(\"\\n--- 脚注 ---\"); foreach (var footnote in footnotesPart.Footnotes.Elements<Footnote>()) { foreach (var paragraph in footnote.Elements<Paragraph>()) { ExtractParagraphContent(paragraph, contentBuilder); contentBuilder.AppendLine(); } } }}