Mercurial > repos > IBBoard
changeset 40:c71855e241fc
* Add method to clean up a string as a valid XML ID
no-open-ticket
author | IBBoard <dev@ibboard.co.uk> |
---|---|
date | Sat, 22 Aug 2009 10:50:17 +0000 |
parents | ebc01964a918 |
children | d5dcd1c09c28 |
files | Xml/XmlTools.cs |
diffstat | 1 files changed, 62 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/Xml/XmlTools.cs Sun Aug 09 11:20:25 2009 +0000 +++ b/Xml/XmlTools.cs Sat Aug 22 10:50:17 2009 +0000 @@ -4,6 +4,7 @@ // using System; +using System.Text.RegularExpressions; using System.Xml; namespace IBBoard.Xml @@ -12,7 +13,10 @@ /// Some basic tools for handling XML files and retrieving their values /// </summary> public class XmlTools - { + { + private static Regex idRegex; + private static Regex multiUnderscoreRegex; + /// <summary> /// Gets the value of an attribute of an element as a boolean. Throws a FormatException if the attribute is not a boolean. /// </summary> @@ -96,5 +100,62 @@ return doubleVal; } + + private static Regex GetIdRegex() + { + if (idRegex == null) + { + idRegex = new Regex("[^a-zA-Z0-9:\\._-]+"); + } + + return idRegex; + } + + private static Regex GetMultiUnderscoreRegex() + { + if (multiUnderscoreRegex == null) + { + multiUnderscoreRegex = new Regex("_{2,}"); + } + + return multiUnderscoreRegex; + } + + /// <summary> + /// Gets a valid XML ID for a given string that does not contain accented and non-ASCII characters. Matches the allowed characters + /// in the XML spec (http://www.w3.org/TR/xml/#NT-NameStartChar) where the characters do not use Unicode character codes. If the ID + /// starts with an invalid character then it will be prepended with an underscore. + /// </summary> + /// <param name="str"> + /// The <see cref="System.String"/> to turn in to a valid ID + /// </param> + /// <returns> + /// The valid XML ID with all series of invalid characters replaced with an underscore + /// </returns> + public static string GetAsciiXmlIdForString(string str) + { + string id = GetIdRegex().Replace(str, "_"); + id = GetMultiUnderscoreRegex().Replace(id, "_"); + + if (!IdStartsWithValidCharacter(id)) + { + id = "_" + id; + } + + return id; + } + + private static bool IdStartsWithValidCharacter(string id) + { + bool valid = false; + + if (id.Length > 0) + { + char firstChar = id[0]; + valid = ('A' <= firstChar && firstChar <= 'Z') || ('a' <= firstChar && firstChar <= 'z') || firstChar == '_' || firstChar == ':'; + } + + return valid; + } } }