Regular expressions are a pain in the neck. Unfortunately, they're also very useful. So I bullied my uncooperative brain into studying regular expressions and found myself hooked — once I got out of the "Why the @&#$*! doesn't this work' phase. Anyway, I always wanted to make a Syntax Highlighter and I finally got around to doing it. Here's the code for anybody who might want to learn more about regular expressions or anybody who just wants yet another syntax highlighter in their toolbox. ####Why You Should Highlight Code

Because you're a nice person. Because it will make your code more readable. Because your readers will love you. Any more questions?

Without further ado, then, here's the code for Syntax Highlighter V1.0, highlighted and formatted, I might add, using the Syntax Highlighter:

  1. using System;
  2. using System.Text;
  3. using System.Text.RegularExpressions;
  4.  
  5. public class CsharpHighlighter
  6. {
  7. public string Highlight(string code)
  8. {
  9. StringBuilder patterns = new StringBuilder();
  10.  
  11. //Regular expression for single-line comments
  12. patterns.Append(@"(/(?!//)/[^ ]*)|");
  13.  
  14. //Regular expression for formal documentation comments
  15. patterns.Append(@"(///[^ ]*)|");
  16.  
  17. //Regular expression for matching multi-line comments
  18. patterns.Append(@"(/*.*?*/)|");
  19.  
  20. //Regular expression for matching double-quote string
  21. patterns.Append(@"((?<!@)&quot;[^ ]*?(?<!\)&quot;)|");
  22.  
  23. //Regular expression for matching hard quotes string
  24. patterns.Append(@"(@&quot;.*?(?<!\)&quot;)|");
  25.  
  26. //Regular expression for matching single-quote string
  27. patterns.Append(@"('[^ ]*?(?<!\)')|");
  28.  
  29. //Keywords
  30. patterns.Append(GetKeywords());
  31.  
  32. Regex all = new Regex(patterns.ToString(), RegexOptions.Singleline);
  33.  
  34. code = all.Replace(code, new MatchEvaluator(HandleMatch));
  35.  
  36. Regex line = new Regex(@"^.*?$", RegexOptions.Multiline);
  37.  
  38. code = line.Replace(code, new MatchEvaluator(HandleLines));
  39.  
  40. //Turn tabs and spaces into &nbsp;s
  41. Regex tabsToSpaces = new Regex(@"<li> * *", RegexOptions.Singleline);
  42.  
  43. code = tabsToSpaces.Replace(code, new MatchEvaluator(HandleTabs));
  44.  
  45. //Break multi-line comments into lines properly
  46. Regex mlcToLines = new Regex(@"/*.*?*/", RegexOptions.Singleline);
  47.  
  48. code = mlcToLines.Replace(code, new MatchEvaluator(HandleMLC));
  49.  
  50. //Break hard strings properly
  51. Regex hardStrToLines = new Regex
  52. (@"@&quot;.*?(?<!\)&quot;", RegexOptions.Singleline);
  53.  
  54. code = hardStrToLines.Replace(code, new MatchEvaluator(HandleSTR));
  55.  
  56. return "<ol class = "code"> " + code + "</ol> ";
  57. }
  58.  
  59. private string HandleMatch(Match m)
  60. {
  61. //Single-line comments
  62. if(m.Groups[1].Success)
  63. {
  64. return "<span class = "slc">" + m.Value + "</span>";
  65. }
  66.  
  67. //Formal documentation comments
  68. else if (m.Groups[2].Success)
  69. {
  70. return "<span class = "fdc">" + m.Value + "</span>";
  71. }
  72.  
  73. //Multi-line comments
  74. else if (m.Groups[3].Success)
  75. {
  76. return "<span class = "mlc">" + m.Value + "</span>";
  77. }
  78.  
  79. //String
  80. else if (m.Groups[4].Success || m.Groups[5].Success || m.Groups[6].Success)
  81. {
  82. return "<span class = "str">" + m.Value + "</span>";
  83. }
  84.  
  85. else if (m.Groups[7].Success)
  86. {
  87. return "<span class = "kwd">" + m.Value + "</span>";
  88. }
  89.  
  90. else
  91. {
  92. return String.Empty;
  93. }
  94. }
  95.  
  96. private string HandleLines(Match m)
  97. {
  98. //Add &nbsp; to empty lines so they show up
  99. if (m.Value.Trim().Length < 1)
  100. {
  101. return "<li>&nbsp;</li>";
  102. }
  103.  
  104. else
  105. {
  106. //If we don't get rid of the new line character, the <li>
  107. //ends up on a, umm, new line — the HTML source code looks
  108. //somewhat ugly.
  109. return "<li>" + m.Value.TrimEnd(‘ ', ‘ ') + "</li>";
  110. }
  111. }
  112.  
  113. private string HandleMLC(Match m)
  114. {
  115. StringBuilder value = new StringBuilder(m.Value);
  116.  
  117. value.Replace("<li>", "<li><span class = "mlc">");
  118. value.Replace("</li>", "</span></li>");
  119.  
  120. return value.ToString();
  121. }
  122.  
  123. private string HandleSTR(Match m)
  124. {
  125. StringBuilder value = new StringBuilder(m.Value);
  126.  
  127. value.Replace("<li>", "<li><span class = "str">");
  128. value.Replace("</li>", "</span></li>");
  129.  
  130. return value.ToString();
  131. }
  132.  
  133. private string HandleTabs(Match m)
  134. {
  135. StringBuilder space = new StringBuilder();
  136.  
  137. space.Append("<li>");
  138. //We're simply going to convert each tab into 4 spaces
  139. for (int i = 0; i < m.Value.Length - 4; i++)
  140. space.Append("&nbsp;&nbsp;&nbsp;&nbsp;");
  141.  
  142. return space.ToString();
  143. }
  144.  
  145. private string GetKeywords()
  146. {
  147. StringBuilder kwds = new StringBuilder(@"b(
  148. abstract|
  149. as|
  150. base|
  151. bool|
  152. boolean|
  153. break|
  154. byte|
  155. case|
  156. catch|
  157. char|
  158. checked|
  159. class|
  160. const|
  161. continue|
  162. decimal|
  163. default|
  164. delegate|
  165. do|
  166. double|
  167. else|
  168. enum|
  169. event|
  170. explicit|
  171. extern|
  172. false|
  173. finally|
  174. fixed|
  175. float|
  176. for|
  177. foreach|
  178. get|
  179. goto|
  180. if|
  181. implements|
  182. implicit|
  183. in|
  184. instanceof|
  185. int|
  186. interface|
  187. internal|
  188. is|
  189. length|
  190. lock|
  191. long|
  192. namespace|
  193. native|
  194. new|
  195. null|
  196. object|
  197. operator|
  198. out|
  199. override|
  200. package|
  201. params|
  202. private|
  203. protected|
  204. public|
  205. readonly|
  206. ref|
  207. return|
  208. sbyte|
  209. sealed|
  210. set|
  211. short|
  212. sizeof
  213. stackalloc|
  214. static|
  215. string|
  216. struct|
  217. super|
  218. switch|
  219. synchronized|
  220. this|
  221. threadsafe|
  222. throw|
  223. throws|
  224. true|
  225. try|
  226. typeof|
  227. uint|
  228. ulong|
  229. unchecked|
  230. unsafe|
  231. ushort|
  232. using|
  233. virtual|
  234. void|
  235. while
  236.         )b");
  237.  
  238. kwds.Replace(" ", "");
  239.  
  240. kwds.Replace(" ", "");
  241.  
  242. kwds.Replace(" ", "");
  243.  
  244. return kwds.ToString();
  245. }
  246. }

What's Right About Version of Syntax Highlighter

  • It uses a list rather than the
     tag, which is just plain awesome. I can't tell you how much I hate the 
     tag. I've become especially miserable with it in the recent days. I post a lot of code to this blog, but I really don't want to spend time wrestling with lines that are too long.
  • Each line is numbered, which is also very nice. For one thing, it's easier to tell the readers to "insert [new code] at line 243" than it is to say "insert [new code] after the line in the Highlight function after we make the regex pattern for matching double-quote strings.' For another thing, line numbers just make it so much easier to read the code.

Changes to be Made Still

  • Turn the string of &nbsps; into padding-lefts.
  • Right now, this is a C#-only highlighter.
  • Read the keywords from an XML file.

Look for these changes in Version 2.0!