changeset 40:c71855e241fc

* Add method to clean up a string as a valid XML ID no-open-ticket
author IBBoard <dev@ibboard.co.uk>
date Sat, 22 Aug 2009 10:50:17 +0000
parents ebc01964a918
children d5dcd1c09c28
files Xml/XmlTools.cs
diffstat 1 files changed, 62 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/Xml/XmlTools.cs	Sun Aug 09 11:20:25 2009 +0000
+++ b/Xml/XmlTools.cs	Sat Aug 22 10:50:17 2009 +0000
@@ -4,6 +4,7 @@
 // 
 
 using System;
+using System.Text.RegularExpressions;
 using System.Xml;
 
 namespace IBBoard.Xml
@@ -12,7 +13,10 @@
 	/// Some basic tools for handling XML files and retrieving their values
 	/// </summary>
 	public class XmlTools
-	{			
+	{
+		private static Regex idRegex;
+		private static Regex multiUnderscoreRegex;
+		
 		/// <summary>
 		/// Gets the value of an attribute of an element as a boolean. Throws a FormatException if the attribute is not a boolean.
 		/// </summary>
@@ -96,5 +100,62 @@
 			
 			return doubleVal;
 		}
+		
+		private static Regex GetIdRegex()
+		{
+			if (idRegex == null)
+			{
+				idRegex = new Regex("[^a-zA-Z0-9:\\._-]+");
+			}
+			
+			return idRegex;
+		}
+		
+		private static Regex GetMultiUnderscoreRegex()
+		{
+			if (multiUnderscoreRegex == null)
+			{
+				multiUnderscoreRegex = new Regex("_{2,}");
+			}
+			
+			return multiUnderscoreRegex;
+		}
+		
+		/// <summary>
+		/// Gets a valid XML ID for a given string that does not contain accented and non-ASCII characters. Matches the allowed characters
+		/// in the XML spec (http://www.w3.org/TR/xml/#NT-NameStartChar) where the characters do not use Unicode character codes. If the ID
+		/// starts with an invalid character then it will be prepended with an underscore.
+		/// </summary>
+		/// <param name="str">
+		/// The <see cref="System.String"/> to turn in to a valid ID
+		/// </param>
+		/// <returns>
+		/// The valid XML ID with all series of invalid characters replaced with an underscore
+		/// </returns>
+		public static string GetAsciiXmlIdForString(string str)
+		{
+			string id = GetIdRegex().Replace(str, "_");
+			id = GetMultiUnderscoreRegex().Replace(id, "_");
+			
+			if (!IdStartsWithValidCharacter(id))
+			{
+				id = "_" + id;
+			}
+			
+			return id;
+		}
+		
+		private static bool IdStartsWithValidCharacter(string id)
+		{
+			bool valid = false;
+			
+			if (id.Length > 0)
+			{
+				char firstChar = id[0];
+				valid = ('A' <= firstChar && firstChar <= 'Z') || ('a' <= firstChar && firstChar <= 'z') || firstChar == '_' || firstChar == ':';
+			}
+			
+			return valid;
+		}
 	}
 }