码迷,mamicode.com
首页 > 编程语言 > 详细

c#中使用python语法的正则表达式.

时间:2015-08-21 07:09:29      阅读:273      评论:0      收藏:0      [点我收藏+]

标签:python   unity3d   正则表达式   源码   开源项目   

转载请注明出处:http://blog.csdn.net/zxsean


因为项目需求,现在需要在c#中使用python的正则,最开始采用的方法是ironpython.

但是在ios上面编译无法通过.好在ironpython是一个开源项目,于是拿到代码开始研究.


因为我需要的功能很简单,所以最后的代码也只是判断是否传入的字符串匹配我传入的python正则表达式.


解析部分代码直接使用:

/* ****************************************************************************
 *
 * Copyright (c) Microsoft Corporation. 
 *
 * This source code is subject to terms and conditions of the Microsoft Public
 * License. A  copy of the license can be found in the License.html file at the
 * root of this distribution. If  you cannot locate the  Microsoft Public
 * License, please send an email to  dlr@microsoft.com. By using this source
 * code in any fashion, you are agreeing to be bound by the terms of the 
 * Microsoft Public License.
 *
 * You must not remove this notice, or any other, from this software.
 *
 * ***************************************************************************/

using System;
using System.Text;
using System.Text.RegularExpressions;

/// <summary>
/// python正则解析
/// By ZeaLotSean
/// </summary>
public static class PythonRegex
{
    #region CONSTANTS

    // short forms
    //public static object I = 0x02;
    //public static object L = 0x04;
    //public static object M = 0x08;
    //public static object S = 0x10;
    //public static object U = 0x20;
    //public static object X = 0x40;

    // long forms
    public static object IGNORECASE = 0x02;
    public static object LOCALE = 0x04;
    public static object MULTILINE = 0x08;
    public static object DOTALL = 0x10;
    public static object UNICODE = 0x20;
    public static object VERBOSE = 0x40;

    #endregion

    /// <summary>
    /// 编译一个正则表达式
    /// </summary>
    /// <param name="_pattern"></param>
    /// <returns></returns>
    public static Python_Pattern Compile(string _pattern)
    {
        return new Python_Pattern(_pattern, 0, true);
    }

    public static bool isMatched(string _pattern, string _str)
    {
        return isMatched(_pattern, _str, 0);
    }

    public static bool isMatched(string _pattern, string _str, int flags)
    {
        return new Python_Pattern(_pattern, flags).isMatched(_str);
    }

    /// <summary>
    /// Compiled reg-ex pattern
    /// </summary>
    public class Python_Pattern
    {
        internal Regex m_re;

        internal ParsedRegex m_pre;

        public Python_Pattern(string pattern)
            : this(pattern, 0)
        {
        }

        public Python_Pattern(string pattern, int flags)
            : this(pattern, flags, false)
        {
        }

        public Python_Pattern(string pattern, int flags, bool compiled)
        {
            m_pre = PreParseRegex(pattern);

            RegexOptions opts = FlagsToOption(flags);

            this.m_re = new Regex(m_pre.Pattern, opts | (compiled ? RegexOptions.Compiled : RegexOptions.None));
        }

        /// <summary>
        /// 传入字符串查找是否匹配正则表达式
        /// </summary>
        /// <param name="_str"></param>
        /// <returns></returns>
        public bool isMatched(string _str)
        {
            return m_re.Match(_str).Success;
        }

        public string Pattern
        {
            get
            {
                return m_pre.UserPattern;
            }
        }
    }

    private static RegexOptions FlagsToOption(int flags)
    {
        RegexOptions opts = RegexOptions.None;
        if ((flags & (int)IGNORECASE) != 0) opts |= RegexOptions.IgnoreCase;
        if ((flags & (int)MULTILINE) != 0) opts |= RegexOptions.Multiline;
        if (((flags & (int)LOCALE)) == 0) opts &= (~RegexOptions.CultureInvariant);
        if ((flags & (int)DOTALL) != 0) opts |= RegexOptions.Singleline;
        if ((flags & (int)VERBOSE) != 0) opts |= RegexOptions.IgnorePatternWhitespace;

        return opts;
    }

    internal class ParsedRegex
    {
        public ParsedRegex(string pattern)
        {
            this.UserPattern = pattern;
        }

        public string UserPattern;
        public string Pattern;
        public RegexOptions Options = RegexOptions.CultureInvariant;
    }

    /// <summary>
    /// Preparses a regular expression text returning a ParsedRegex class
    /// that can be used for further regular expressions.
    /// 传入一个python正则表达式,返回一个c#可用格式
    /// </summary>
    private static ParsedRegex PreParseRegex(string pattern)
    {
        ParsedRegex res = new ParsedRegex(pattern);

        //string newPattern;
        int cur = 0, nameIndex;
        int curGroup = 0;
        bool containsNamedGroup = false;

        for (; ; )
        {
            nameIndex = pattern.IndexOf("(", cur);
            if (nameIndex > 0 && pattern[nameIndex - 1] == '\\')
            {
                int curIndex = nameIndex - 2;
                int backslashCount = 1;
                while (curIndex >= 0 && pattern[curIndex] == '\\')
                {
                    backslashCount++;
                    curIndex--;
                }
                // odd number of back slashes, this is an optional
                // paren that we should ignore.
                if ((backslashCount & 0x01) != 0)
                {
                    cur++;
                    continue;
                }
            }

            if (nameIndex == -1) break;
            if (nameIndex == pattern.Length - 1) break;

            switch (pattern[++nameIndex])
            {
                case '?':
                    // extension syntax
                    if (nameIndex == pattern.Length - 1)
                    {
                        return null;
                    }

                    switch (pattern[++nameIndex])
                    {
                        case 'P':
                            //  named regex, .NET doesn't expect the P so we'll remove it;
                            //  also, once we see a named group i.e. ?P then we need to start artificially 
                            //  naming all unnamed groups from then on---this is to get around the fact that 
                            //  the CLR RegEx support orders all the unnamed groups before all the named 
                            //  groups, even if the named groups are before the unnamed ones in the pattern;
                            //  the artificial naming preserves the order of the groups and thus the order of
                            //  the matches
                            if (nameIndex + 1 < pattern.Length && pattern[nameIndex + 1] == '=')
                            {
                                // match whatever was previously matched by the named group

                                // remove the (?P=
                                pattern = pattern.Remove(nameIndex - 2, 4);
                                pattern = pattern.Insert(nameIndex - 2, "\\\\k<");
                                int tmpIndex = nameIndex;
                                while (tmpIndex < pattern.Length && pattern[tmpIndex] != ')')
                                    tmpIndex++;

                                if (tmpIndex == pattern.Length)
                                {
                                    return null;
                                }

                                pattern = pattern.Substring(0, tmpIndex) + ">" + pattern.Substring(tmpIndex + 1);
                            }
                            else
                            {
                                containsNamedGroup = true;
                                pattern = pattern.Remove(nameIndex, 1);
                            }
                            break;
                        case 'i': res.Options |= RegexOptions.IgnoreCase; break;
                        case 'L': res.Options &= ~(RegexOptions.CultureInvariant); break;
                        case 'm': res.Options |= RegexOptions.Multiline; break;
                        case 's': res.Options |= RegexOptions.Singleline; break;
                        case 'u': break;
                        case 'x': res.Options |= RegexOptions.IgnorePatternWhitespace; break;
                        case ':': break; // non-capturing
                        case '=': break; // look ahead assertion
                        case '<': break; // positive look behind assertion
                        case '!': break; // negative look ahead assertion
                        case '#': break; // inline comment
                        case '(':  // yes/no if group exists, we don't support this
                        default:
                            {
                                return null;
                            }
                    }
                    break;
                default:
                    // just another group
                    curGroup++;
                    if (containsNamedGroup)
                    {
                        // need to name this unnamed group
                        pattern = pattern.Insert(nameIndex, "?<Named" + GetRandomString() + ">");
                    }
                    break;
            }

            cur = nameIndex;
        }

        cur = 0;
        for (; ; )
        {
            nameIndex = pattern.IndexOf('\\', cur);

            if (nameIndex == -1 || nameIndex == pattern.Length - 1) break;
            char curChar = pattern[++nameIndex];
            switch (curChar)
            {
                case 'x':
                case 'u':
                case 'a':
                case 'b':
                case 'e':
                case 'f':
                case 'n':
                case 'r':
                case 't':
                case 'v':
                case 'c':
                case 's':
                case 'W':
                case 'w':
                case 'p':
                case 'P':
                case 'S':
                case 'd':
                case 'D':
                case 'Z':
                    // known escape sequences, leave escaped.
                    break;
                case '\\':
                    // escaping a \                    cur += 2;
                    break;
                default:
                    System.Globalization.UnicodeCategory charClass = Char.GetUnicodeCategory(curChar);
                    switch (charClass)
                    {
                        // recognized word characters, always unescape.
                        case System.Globalization.UnicodeCategory.ModifierLetter:
                        case System.Globalization.UnicodeCategory.LowercaseLetter:
                        case System.Globalization.UnicodeCategory.UppercaseLetter:
                        case System.Globalization.UnicodeCategory.TitlecaseLetter:
                        case System.Globalization.UnicodeCategory.OtherLetter:
                        case System.Globalization.UnicodeCategory.LetterNumber:
                        case System.Globalization.UnicodeCategory.OtherNumber:
                        case System.Globalization.UnicodeCategory.ConnectorPunctuation:
                            pattern = pattern.Remove(nameIndex - 1, 1);
                            break;
                        case System.Globalization.UnicodeCategory.DecimalDigitNumber:
                            //  actually don't want to unescape '\1', '\2' etc. which are references to groups
                            break;
                    }
                    break;
            }
            cur++;
        }

        res.Pattern = pattern;
        return res;
    }

    static Random r = new Random(DateTime.Now.Millisecond);
    private static string GetRandomString()
    {
        return r.Next(Int32.MaxValue / 2, Int32.MaxValue).ToString();
    }
}


版权声明:本文为博主原创文章,未经博主允许不得转载。

c#中使用python语法的正则表达式.

标签:python   unity3d   正则表达式   源码   开源项目   

原文地址:http://blog.csdn.net/zxsean/article/details/47824725

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!