C#使用正则表达式移除所有的Html标记,返回纯文本-C/S开发框架
作者:csframework|C/S框架网  发布日期:2021/12/30 18:58:58

  C#使用正则表达式移除所有的Html标记,返回纯文本

C# Code:

C# 全选
/// <summary>
/// C#使用正则表达式移除所有的Html标记,返回纯文本
/// </summary>
/// <param name="HtmlString"></param>
/// <returns>返回纯文本</returns>
private static string RemoveHTML(string HtmlString)
{
  //删除脚本
   HtmlString =
  Regex.Replace(HtmlString, @"<script[^>]*?>.*?</script>",
  "", RegexOptions.IgnoreCase);
  //删除HTML
   HtmlString = Regex.Replace(HtmlString, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
  HtmlString = Regex.Replace(HtmlString, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
  HtmlString = Regex.Replace(HtmlString, @"-->", "", RegexOptions.IgnoreCase);
  HtmlString = Regex.Replace(HtmlString, @"<!--.*", "", RegexOptions.IgnoreCase);
  HtmlString = Regex.Replace(HtmlString, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
  HtmlString = Regex.Replace(HtmlString, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
  HtmlString = Regex.Replace(HtmlString, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
  HtmlString = Regex.Replace(HtmlString, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
  HtmlString = Regex.Replace(HtmlString, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
  HtmlString = Regex.Replace(HtmlString, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
  HtmlString = Regex.Replace(HtmlString, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
  HtmlString = Regex.Replace(HtmlString, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
  HtmlString = Regex.Replace(HtmlString, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
  HtmlString = Regex.Replace(HtmlString, @"&#(\d+);", "", RegexOptions.IgnoreCase);
  HtmlString.Replace("<", "");
  HtmlString.Replace(">", "");
  HtmlString.Replace("\r\n", "");
  HtmlString = HttpContext.Current.Server.HtmlEncode(HtmlString).Trim();
  
  return HtmlString;
}

//来源:C/S框架网(www.csframework.com) QQ:23404761



上一篇 下一篇