As anybody who looked at the source code for the previous post might have noticed that I have indeed succeeded in cleaning up my syntax highlighter to produce the UI in a more pleasant manner. I’ve also included stylings for more language-specific elements (such as namespaces, classes, and operators). The line-handler is also much simpler and much more efficient. And finally, the spaces and the tabs (i.e., the presentation) is no longer present in the actual code (which is how it should be; at any rate, a list of ten spaces doesn’t even show up, so we’re better off getting rid of it).

The Code:

  1. using System;
  2. using System.Text;
  3. using System.Text.RegularExpressions;
  4.  
  5. namespace SG.Net.Utilities.SyntaxHighlighter
  6. {
  7. public class CsharpSyntaxHighlighter
  8. {
  9. #region "Data and Data Access"
  10.  
  11. private bool useRegions = false;
  12.  
  13. private bool useLineNumbers = true;
  14.  
  15. public bool UseRegions
  16. {
  17. get { return useRegions; }
  18.  
  19. set { useRegions = value; }
  20. }
  21.  
  22. public bool UseLineNumbers
  23. {
  24. get { return useLineNumbers; }
  25.  
  26. set { useLineNumbers = value; }
  27. }
  28.  
  29. #endregion
  30.  
  31. public string Highlight(string code)
  32. {
  33. Regex all = new Regex(MakePatterns(), RegexOptions.Singleline);
  34.  
  35. code = all.Replace(code, new MatchEvaluator(HandleMatch));
  36.  
  37. //Handle Lines
  38. StringBuilder line = new StringBuilder();
  39.  
  40. //Matches line with tabs
  41. line.Append(@"(^ +.*?$)|");
  42.  
  43. //Matches line with multiple spaces at the start
  44. line.Append(@"(^ {4,}.*?$)|");
  45.  
  46. //Matches all other lines
  47. line.Append(@"(^.*?$)");
  48.  
  49. Regex lines = new Regex(line.ToString(), RegexOptions.Multiline);
  50.  
  51. code = lines.Replace(code, new MatchEvaluator(HandleLine));
  52.  
  53. //Break multi-line comments properly
  54. Regex mlcToLines = new Regex(@"/*.*?*/", RegexOptions.Singleline);
  55.  
  56. code = mlcToLines.Replace(code, new MatchEvaluator(HandleMLC));
  57.  
  58. //Break hard strings properly
  59. Regex hardStrToLines = new Regex(@"@&quot;.*?(?<!\)&quot;", RegexOptions.Singleline);
  60.  
  61. code = hardStrToLines.Replace(code, new MatchEvaluator(HandleHardStrings));
  62.  
  63. //Trim all extra white space
  64. code = Regex.Replace(code, " {2,}", String.Empty);
  65.  
  66. return " <ol class = "code"> " + code + " </ol> ";
  67. }
  68.  
  69. #region "Helper Methods"
  70.  
  71. private string MakePatterns()
  72. {
  73. StringBuilder patterns = new StringBuilder();
  74.  
  75. //Regular expression for single-quote strings
  76. patterns.Append(@"(’[^ ]*?(?<!\)’)|");
  77.  
  78. //Regular expression for double-quote strings
  79. patterns.Append(@"((?<!@)&quot;[^ ]*?(?<!\)&quot;)|");
  80.  
  81. //Regular expression for hard strings
  82. patterns.Append(@"(@&quot;.*?(?<!\)&quot;)|");
  83.  
  84. //Regular expression for single-line comments
  85. patterns.Append(@"(/(?!//)/[^ ]*)|");
  86.  
  87. //Regular expression for formal documentation comments
  88. patterns.Append(@"(///[^ ]*)|");
  89.  
  90. //Regular expression for multi-line comments
  91. patterns.Append(@"(/*.*?*/)|");
  92.  
  93. //Regular expression for language-specific syntax
  94. //such as keywords, operators, namespaces, classes,
  95. //and functions.
  96. patterns.Append(GetSpecialSyntax());
  97.  
  98. return patterns.ToString();
  99. }
  100.  
  101. private string GetSpecialSyntax()
  102. {
  103. StringBuilder specialSyntax = new StringBuilder();
  104.  
  105. specialSyntax.Append(GetOperators() + "|");
  106.  
  107. specialSyntax.Append(GetKeywords() + "|");
  108.  
  109. specialSyntax.Append(GetNamespaces() + "|");
  110.  
  111. specialSyntax.Append(GetClasses() + "|");
  112.  
  113. specialSyntax.Append(GetPreprocessorDirectives());
  114.  
  115. return specialSyntax.ToString();
  116. }
  117.  
  118. private string GetOperators()
  119. {
  120. StringBuilder ops = new StringBuilder(@"/s+(
  121.  
  122. !|
  123. !=|
  124. %|
  125. %=|
  126. &|
  127. &&|
  128. &=|
  129. (|
  130. )|
  131. *|
  132. *=|
  133. +|
  134. ++|
  135. +=|
  136. -|
  137. –|
  138. -=|
  139. -&gt;|
  140. .|
  141. /|
  142. /=|
  143. :|
  144. &lt;|
  145. &lt;&lt;|
  146. &lt;&lt;=|
  147. &lt;=|
  148. =|
  149. ==|
  150. &gt;|
  151. &gt;=|
  152. &gt;&gt;|
  153. &gt;&gt;=|
  154. ?|
  155. [|
  156. ]|
  157. ^|
  158. ^=|
  159. {|
  160. ||
  161. |=|
  162. |||
  163. }|
  164. ~
  165.  
  166. )/s+");
  167.  
  168. ops.Replace(" ", "");
  169. ops.Replace(" ", "");
  170. ops.Replace(" ", "");
  171. ops.Replace(" ", "");
  172. ops.Replace("/s", " ");
  173.  
  174. return ops.ToString();
  175. }
  176.  
  177. private string GetKeywords()
  178. {
  179. StringBuilder kwds = new StringBuilder(@"b(
  180.  
  181. A list of all the keywords, omitted for brevity
  182.  
  183. )b");
  184.  
  185. kwds.Replace(" ", "");
  186. kwds.Replace(" ", "");
  187. kwds.Replace(" ", "");
  188. kwds.Replace(" ", "");
  189.  
  190. return kwds.ToString();
  191. }
  192.  
  193. private string GetNamespaces()
  194. {
  195. StringBuilder nsps = new StringBuilder(@"b.?(
  196.  
  197. A list of all the namespaces, omitted for brevity
  198.  
  199. ).?;?b");
  200.  
  201. nsps.Replace(" ", "");
  202. nsps.Replace(" ", "");
  203. nsps.Replace(" ", "");
  204. nsps.Replace(" ", "");
  205.  
  206. return nsps.ToString();
  207. }
  208.  
  209. private string GetClasses()
  210. {
  211. StringBuilder classes = new StringBuilder(@"b.?(
  212.  
  213. A list of all the classes, ommitted for brevity
  214.  
  215. )(?)?b");
  216.  
  217. classes.Replace(" ", "");
  218. classes.Replace(" ", "");
  219. classes.Replace(" ", "");
  220. classes.Replace(" ", "");
  221.  
  222. return classes.ToString();
  223. }
  224.  
  225. //Doesn’t work properly. For example, this matches
  226. //#region in blah#region. But for some reason, /b
  227. //isn’t working for this one.
  228. private string GetPreprocessorDirectives()
  229. {
  230. StringBuilder preproc = new StringBuilder(@"(
  231.  
  232. #if|
  233. #else|
  234. #elif|
  235. #endif|
  236. #define|
  237. #undef|
  238. #warning|
  239. #error|
  240. #line|
  241. #region|
  242. #endregion
  243.  
  244. )");
  245.  
  246. preproc.Replace(" ", "");
  247. preproc.Replace(" ", "");
  248. preproc.Replace(" ", "");
  249. preproc.Replace(" ", "");
  250. preproc.Replace("/s", " ");
  251.  
  252. return preproc.ToString();
  253. }
  254.  
  255. #endregion
  256.  
  257. #region "Match Handlers"
  258.  
  259. private string HandleMatch(Match m)
  260. {
  261. //Strings
  262. if (m.Groups[1].Success || m.Groups[2].Success || m.Groups[3].Success)
  263. {
  264. return "<span class = "str">" + m.Value + "</span>";
  265. }
  266.  
  267. //Single-line comments
  268. else if (m.Groups[4].Success)
  269. {
  270. return "<span class = "slc">" + m.Value + "</span>";
  271. }
  272.  
  273. //Formal documentation comments
  274. else if (m.Groups[5].Success)
  275. {
  276. return "<span class = "fdc">" + m.Value + "</span>";
  277. }
  278.  
  279. //Multi-Line Comments
  280. else if (m.Groups[6].Success)
  281. {
  282. return "<span class = "mlc">" + m.Value + "</span>";
  283. }
  284.  
  285. //Operators
  286. else if (m.Groups[7].Success)
  287. {
  288. return "<span class = "op">" + m.Value + "</span>";
  289. }
  290.  
  291. //Keywords
  292. else if (m.Groups[8].Success)
  293. {
  294. return "<span class = "kwd">" + m.Value + "</span>";
  295. }
  296.  
  297. //Namespaces
  298. else if (m.Groups[9].Success)
  299. {
  300. return "<span class = "nsp">" + m.Value + "</span>";
  301. }
  302.  
  303. //Classes
  304. else if (m.Groups[10].Success)
  305. {
  306. return "<span class = "cls">" + m.Value + "</span>";
  307. }
  308.  
  309. //Preprocessor directives
  310. else if (m.Groups[11].Success)
  311. {
  312. return "<span class = "preproc">" + m.Value + "</span>";
  313. }
  314.  
  315. else
  316. return m.Value;
  317. }
  318.  
  319. private string HandleLine(Match m)
  320. {
  321. //If the line is empty, add an &nbsp; to make it show up
  322. if (m.Value.Trim() == String.Empty)
  323. return "<li>&nbsp;</li> ";
  324.  
  325. //Line with tabs
  326. else if (m.Groups[1].Success)
  327. {
  328. //For each tab, we’re going to add 25px of left padding
  329. int numberOfTabs = Regex.Match(m.Value, @"^ +").Length;
  330.  
  331. string line = "<li style={0}padding-left:{1}px;{0}>{2}</li> ";
  332.  
  333. return String.Format(line, """, numberOfTabs * 25, m.Value.TrimEnd(‘ ’, ‘ ’));
  334. }
  335.  
  336. //Line with multiple spaces
  337. else if (m.Groups[2].Success)
  338. {
  339. //We’re going to add 25px of left padding for every 4 spaces
  340. int numberOfSpaces = Regex.Match(m.Value, @"^ {4,}").Length;
  341.  
  342. numberOfSpaces /= 4;
  343.  
  344. string line = "<li style={0}padding-left:{1}px;{0}>{2}</li> ";
  345.  
  346. return String.Format(line, """, numberOfSpaces * 25, m.Value.TrimEnd(‘ ’, ‘ ’));
  347. }
  348.  
  349. //All other lines
  350. else
  351. {
  352. return "<li>" + m.Value.TrimEnd(‘ ’, ‘ ’) + "</li> ";
  353. }
  354. }
  355.  
  356. private string HandleMLC(Match m)
  357. {
  358. StringBuilder value = new StringBuilder(m.Value);
  359.  
  360. value.Replace("<li>", "<li><span class = "mlc">");
  361.  
  362. value.Replace("px;">", "px;"><span class = "mlc">");
  363.  
  364. value.Replace("</li>", "<span></li>");
  365.  
  366. return value.ToString();
  367. }
  368.  
  369. private string HandleHardStrings(Match m)
  370. {
  371. StringBuilder value = new StringBuilder(m.Value);
  372.  
  373. value.Replace("<li>", "<li><span class = "str">");
  374.  
  375. value.Replace("px;">", "px;"><span class = "str">");
  376.  
  377. value.Replace("</li>", "<span></li>");
  378.  
  379. return value.ToString();
  380. }
  381.  
  382. #endregion
  383. }
  384. }