C#使用正则表达式移除所有的Html标记,返回纯文本
作者:C/S框架网|www.cscode.ne  发布日期:2020/02/25 15:19:10
  C#使用正则表达式移除所有的Html标记,返回纯文本


C# Code:

/// <summary>
/// C#使用正则表达式移除所有的Html标记,返回纯文本
/// </summary>
/// <param name="HtmlString"></param>
/// <returns>返回纯文本</returns>
private static string RemoveHTML(string HtmlString)
{
  
//删除脚本
  
HtmlString =
  Regex.Replace(HtmlString,
@"<script[^>]*?>.*?</script>",
  
"", RegexOptions.IgnoreCase);
  
//删除HTML
  
HtmlString = Regex.Replace(HtmlString, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
  HtmlString
= Regex.Replace(HtmlString, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
  HtmlString
= Regex.Replace(HtmlString, @"-->", "", RegexOptions.IgnoreCase);
  HtmlString
= Regex.Replace(HtmlString, @"<!--.*", "", RegexOptions.IgnoreCase);
  HtmlString
= Regex.Replace(HtmlString, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
  HtmlString
= Regex.Replace(HtmlString, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
  HtmlString
= Regex.Replace(HtmlString, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
  HtmlString
= Regex.Replace(HtmlString, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
  HtmlString
= Regex.Replace(HtmlString, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
  HtmlString
= Regex.Replace(HtmlString, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
  HtmlString
= Regex.Replace(HtmlString, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
  HtmlString
= Regex.Replace(HtmlString, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
  HtmlString
= Regex.Replace(HtmlString, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
  HtmlString
= Regex.Replace(HtmlString, @"&#(\d+);", "", RegexOptions.IgnoreCase);
  HtmlString.Replace(
"<", "");
  HtmlString.Replace(
">", "");
  HtmlString.Replace(
"\r\n", "");
  HtmlString
= HttpContext.Current.Server.HtmlEncode(HtmlString).Trim();
  
  
return HtmlString;
}

//来源:C/S框架网(www.csframework.com) QQ:23404761



上一篇 下一篇