• <ins id="pjuwb"></ins>
    <blockquote id="pjuwb"><pre id="pjuwb"></pre></blockquote>
    <noscript id="pjuwb"></noscript>
          <sup id="pjuwb"><pre id="pjuwb"></pre></sup>
            <dd id="pjuwb"></dd>
            <abbr id="pjuwb"></abbr>
            隨筆-341  評論-2670  文章-0  trackbacks-0
                DFA與捕獲和預查結合起來的話很麻煩,不能用一張表來迭代,而是要用回溯,然后在回溯的時候記下狀態(tài)。至此正則表達式的所有算法都完成了,接下來是詳細介紹。

                每一個回溯的記錄都需要一張帶有所有捕獲軌跡的表(元素為堆棧的堆棧),給每一個記錄集一張表顯然很浪費,因此我們只用一張表來記錄。但是我們對表進行刪除項目的時候,又可能會影響到上一個回溯的數(shù)據(jù),因此我們要開發(fā)出一個新的堆棧,可以push,可以pop,而且pop之后再push不會破壞原有的東西,一直到外邊顯示指定跳到前一個狀態(tài)為止:
             1         template<typename T, typename K>
             2         void Push(List<T, K>& elements, int& available, int& count, const T& element)
             3         {
             4             if(elements.Count()==count)
             5             {
             6                 elements.Add(element);
             7             }
             8             else
             9             {
            10                 elements[count]=element;
            11             }
            12             T& current=elements[count];
            13             current.previous=available;
            14             available=count++;
            15         }
            16 
            17         template<typename T, typename K>
            18         T Pop(List<T, K>& elements, int& available, int& count)
            19         {
            20             T& current=elements[available];
            21             available=current.previous;
            22             return current;
            23         }

                于是我們就可以對可以回溯的列表進行保存了。一開始available=-1,count=0。每次push的時候改變available和count,但是對于列表來說,0到count-1的東西都是安全的。pop的時候改變available,但是count保留不變,這個時候elements[count-1]可以通過elements[count-1].available(而不是參數(shù)的available)追溯到上一個項。這樣就把所有存在的堆棧都壓入到同一個列表里面去了,而且不同的堆棧可以通過不同的count訪問,一直到比較小的count決定push東西了,大的count對應的堆棧才會受到破壞。于是我們就用一個列表和一個count表組成了一個元素是堆棧的堆棧。

                知道怎么操作之后下面的匹配代碼就很好理解了:
              1 #include "RegexRich.h"
              2 
              3 namespace vl
              4 {
              5     namespace regex_internal
              6     {
              7 
              8 /***********************************************************************
              9 回溯輔助數(shù)據(jù)結構
             10 ***********************************************************************/
             11 
             12         class SaverBase
             13         {
             14         public:
             15             bool                available;
             16             int                    previous;
             17         };
             18 
             19         class StateSaver
             20         {
             21         public:
             22             enum StateStoreType
             23             {
             24                 Positive,
             25                 Negative,
             26                 Other
             27             };
             28 
             29             const wchar_t*        reading;                    //當前字符串位置
             30             State*                currentState;                //當前狀態(tài)
             31             int                    minTransition;                //最小可用轉換
             32             int                    captureCount;                //有效capture數(shù)量
             33             int                    stateSaverCount;            //有效回溯狀態(tài)數(shù)量
             34             int                    extensionSaverAvailable;    //有效未封閉擴展功能數(shù)量
             35             int                    extensionSaverCount;        //所有未封閉擴展功能數(shù)量
             36             StateStoreType        storeType;                    //保存狀態(tài)的原因
             37 
             38             bool operator==(const StateSaver& saver)
             39             {
             40                 return
             41                     reading==saver.reading &&
             42                     currentState==saver.currentState &&
             43                     minTransition==saver.minTransition &&
             44                     captureCount==saver.captureCount;
             45             }
             46         };
             47 
             48         class ExtensionSaver : public SaverBase
             49         {
             50         public:
             51             int                    captureListIndex;
             52             Transition*            transition;
             53             const wchar_t*        reading;
             54 
             55             bool operator==(const ExtensionSaver& saver)
             56             {
             57                 return
             58                     captureListIndex==saver.captureListIndex &&
             59                     transition==saver.transition &&
             60                     reading==saver.reading;
             61             }
             62         };
             63 
             64         template<typename T, typename K>
             65         void Push(List<T, K>& elements, int& available, int& count, const T& element)
             66         {
             67             if(elements.Count()==count)
             68             {
             69                 elements.Add(element);
             70             }
             71             else
             72             {
             73                 elements[count]=element;
             74             }
             75             T& current=elements[count];
             76             current.previous=available;
             77             available=count++;
             78         }
             79 
             80         template<typename T, typename K>
             81         T Pop(List<T, K>& elements, int& available, int& count)
             82         {
             83             T& current=elements[available];
             84             available=current.previous;
             85             return current;
             86         }
             87 
             88         template<typename T, typename K>
             89         void PushNonSaver(List<T, K>& elements, int& count, const T& element)
             90         {
             91             if(elements.Count()==count)
             92             {
             93                 elements.Add(element);
             94             }
             95             else
             96             {
             97                 elements[count]=element;
             98             }
             99             count++;
            100         }
            101 
            102         template<typename T, typename K>
            103         T PopNonSaver(List<T, K>& elements, int& count)
            104         {
            105             return elements[--count];
            106         }
            107     }
            108 
            109     template<>
            110     struct POD<regex_internal::StateSaver>
            111     {
            112         static const bool Result=true;
            113     };
            114 
            115     template<>
            116     struct POD<regex_internal::ExtensionSaver>
            117     {
            118         static const bool Result=true;
            119     };
            120 
            121     namespace regex_internal
            122     {
            123 /***********************************************************************
            124 CaptureRecord
            125 ***********************************************************************/
            126 
            127         bool CaptureRecord::operator==(const CaptureRecord& record)
            128         {
            129             return capture==record.capture && start==record.start && length==record.length;
            130         }
            131 
            132 /***********************************************************************
            133 PureInterpretor
            134 ***********************************************************************/
            135 
            136         RichInterpretor::RichInterpretor(Automaton::Ref _dfa)
            137             :dfa(_dfa)
            138         {
            139             datas=new UserData[dfa->states.Count()];
            140 
            141             for(int i=0;i<dfa->states.Count();i++)
            142             {
            143                 State* state=dfa->states[i].Obj();
            144                 int charEdges=0;
            145                 int nonCharEdges=0;
            146                 bool mustSave=false;
            147                 for(int j=0;j<state->transitions.Count();j++)
            148                 {
            149                     if(state->transitions[j]->type==Transition::Chars)
            150                     {
            151                         charEdges++;
            152                     }
            153                     else
            154                     {
            155                         if(state->transitions[j]->type==Transition::Negative ||
            156                            state->transitions[j]->type==Transition::Positive)
            157                         {
            158                             mustSave=true;
            159                         }
            160                         nonCharEdges++;
            161                     }
            162                 }
            163                 datas[i].NeedKeepState=mustSave || nonCharEdges>1 || (nonCharEdges!=0 && charEdges!=0);
            164                 state->userData=&datas[i];
            165             }
            166         }
            167 
            168         RichInterpretor::~RichInterpretor()
            169         {
            170             delete[] datas;
            171         }
            172 
            173         bool RichInterpretor::MatchHead(const wchar_t* input, const wchar_t* start, Result& result)
            174         {
            175             List<StateSaver> stateSavers;
            176             List<ExtensionSaver> extensionSavers;
            177 
            178             StateSaver currentState;
            179             currentState.captureCount=0;
            180             currentState.currentState=dfa->startState;
            181             currentState.extensionSaverAvailable=-1;
            182             currentState.extensionSaverCount=0;
            183             currentState.minTransition=0;
            184             currentState.reading=input;
            185             currentState.stateSaverCount=0;
            186             currentState.storeType=StateSaver::Other;
            187 
            188             while(!currentState.currentState->finalState)
            189             {
            190                 bool found=false;
            191                 StateSaver oldState=currentState;
            192                 //開始遍歷轉換
            193                 for(int i=currentState.minTransition;i<currentState.currentState->transitions.Count();i++)
            194                 {
            195                     Transition* transition=currentState.currentState->transitions[i];
            196                     switch(transition->type)
            197                     {
            198                     case Transition::Chars:
            199                         {
            200                             CharRange range=transition->range;
            201                             found=
            202                                 range.begin<=*currentState.reading && 
            203                                 range.end>=*currentState.reading;
            204                             if(found)
            205                             {
            206                                 currentState.reading++;
            207                             }
            208                         }
            209                         break;
            210                     case Transition::BeginString:
            211                         {
            212                             found=currentState.reading==start;
            213                         }
            214                         break;
            215                     case Transition::EndString:
            216                         {
            217                             found=*currentState.reading==L'\0';
            218                         }
            219                         break;
            220                     case Transition::Nop:
            221                         {
            222                             found=true;
            223                         }
            224                         break;
            225                     case Transition::Capture:
            226                         {
            227                             ExtensionSaver saver;
            228                             saver.captureListIndex=currentState.captureCount;
            229                             saver.reading=currentState.reading;
            230                             saver.transition=transition;
            231                             Push(extensionSavers, currentState.extensionSaverAvailable, currentState.extensionSaverCount, saver);
            232 
            233                             CaptureRecord capture;
            234                             capture.capture=transition->capture;
            235                             capture.start=currentState.reading-input;
            236                             capture.length=-1;
            237                             PushNonSaver(result.captures, currentState.captureCount, capture);
            238 
            239                             found=true;
            240                         }
            241                         break;
            242                     case Transition::Match:
            243                         {
            244                             int index=0;
            245                             for(int j=0;j<currentState.captureCount;j++)
            246                             {
            247                                 CaptureRecord& capture=result.captures[j];
            248                                 if(capture.capture==transition->capture)
            249                                 {
            250                                     if(capture.length!=-1 && (transition->index==-1 || transition->index==index))
            251                                     {
            252                                         if(wcsncmp(input+capture.start, currentState.reading, capture.length)==0)
            253                                         {
            254                                             currentState.reading+=capture.length;
            255                                             found=true;
            256                                             break;
            257                                         }
            258                                     }
            259                                     if(transition->index!=-1 && index==transition->index)
            260                                     {
            261                                         break;
            262                                     }
            263                                     else
            264                                     {
            265                                         index++;
            266                                     }
            267                                 }
            268                             }
            269                         }
            270                         break;
            271                     case Transition::Positive:
            272                         {
            273                             ExtensionSaver saver;
            274                             saver.captureListIndex=-1;
            275                             saver.reading=currentState.reading;
            276                             saver.transition=transition;
            277                             Push(extensionSavers, currentState.extensionSaverAvailable, currentState.extensionSaverCount, saver);
            278                             //Positive的oldState一定會被push
            279                             oldState.storeType=StateSaver::Positive;
            280                             found=true;
            281                         }
            282                         break;
            283                     case Transition::Negative:
            284                         {
            285                             ExtensionSaver saver;
            286                             saver.captureListIndex=-1;
            287                             saver.reading=currentState.reading;
            288                             saver.transition=transition;
            289                             Push(extensionSavers, currentState.extensionSaverAvailable, currentState.extensionSaverCount, saver);
            290                             //Negative的oldState一定會被push
            291                             oldState.storeType=StateSaver::Negative;
            292                             found=true;
            293                         }
            294                         break;
            295                     case Transition::NegativeFail:
            296                         {
            297                             //只有在回溯的時候NegativeFail才會被考慮
            298                         }
            299                         break;
            300                     case Transition::End:
            301                         {
            302                             ExtensionSaver saver=Pop(extensionSavers, currentState.extensionSaverAvailable, currentState.extensionSaverCount);
            303                             switch(saver.transition->type)
            304                             {
            305                             case Transition::Capture:
            306                                 {
            307                                     CaptureRecord& capture=result.captures[saver.captureListIndex];
            308                                     capture.length=(currentState.reading-input)-capture.start;
            309                                     found=true;
            310                                 }
            311                                 break;
            312                             case Transition::Positive:
            313                                 for(int j=currentState.stateSaverCount-1;j>=0;j--)
            314                                 {
            315                                     StateSaver& saver=stateSavers[j];
            316                                     if(saver.storeType==StateSaver::Positive)
            317                                     {
            318                                         oldState.reading=saver.reading;
            319                                         oldState.stateSaverCount=j;
            320                                         currentState.reading=saver.reading;
            321                                         currentState.stateSaverCount=j;
            322                                         break;
            323                                     }
            324                                 }
            325                                 found=true;
            326                                 break;
            327                             case Transition::Negative:
            328                                 for(int j=currentState.stateSaverCount-1;j>=0;j--)
            329                                 {
            330                                     StateSaver& saver=stateSavers[j];
            331                                     if(saver.storeType==StateSaver::Negative)
            332                                     {
            333                                         oldState=saver;
            334                                         oldState.storeType=StateSaver::Other;
            335                                         currentState=saver;
            336                                         currentState.storeType=StateSaver::Other;
            337                                         i=currentState.minTransition-1;
            338                                         break;
            339                                     }
            340                                 }
            341                                 break;
            342                             }
            343                         }
            344                         break;
            345                     }
            346                     //尋找成功,在必要的時候保存當前的回溯狀態(tài)
            347                     if(found)
            348                     {
            349                         UserData* data=(UserData*)currentState.currentState->userData;
            350                         if(data->NeedKeepState)
            351                         {
            352                             oldState.minTransition=i+1;
            353                             PushNonSaver(stateSavers, currentState.stateSaverCount, oldState);
            354                         }
            355                         currentState.currentState=transition->target;
            356                         currentState.minTransition=0;
            357                         break;
            358                     }
            359                 }
            360                 if(!found)
            361                 {
            362                     //存在回溯記錄則回溯
            363                     if(currentState.stateSaverCount)
            364                     {
            365                         //恢復Negative失敗狀態(tài)的時候要移動到NegativeFail后面
            366                         currentState=PopNonSaver(stateSavers, currentState.stateSaverCount);
            367                         //minTransition總是被+1后保存,因此直接-1總是有效值
            368                         if(currentState.currentState->transitions[currentState.minTransition-1]->type==Transition::Negative)
            369                         {
            370                             //尋找NegativeFail
            371                             for(int i=0;i<currentState.currentState->transitions.Count();i++)
            372                             {
            373                                 Transition* transition=currentState.currentState->transitions[i];
            374                                 if(transition->type==Transition::NegativeFail)
            375                                 {
            376                                     //將當前狀態(tài)移動到NegativeFail后面
            377                                     currentState.currentState=transition->target;
            378                                     currentState.minTransition=0;
            379                                     currentState.storeType=StateSaver::Other;
            380                                     break;
            381                                 }
            382                             }
            383                         }
            384                     }
            385                     else
            386                     {
            387                         break;
            388                     }
            389                 }
            390             }
            391 
            392             //判斷是否成功并且處理返回結果
            393             if(currentState.currentState->finalState)
            394             {
            395                 result.start=0;
            396                 result.length=currentState.reading-input;
            397                 for(int i=result.captures.Count()-1;i>=currentState.captureCount;i++)
            398                 {
            399                     result.captures.RemoveAt(i);
            400                 }
            401                 return true;
            402             }
            403             else
            404             {
            405                 result.captures.Clear();
            406                 return false;
            407             }
            408         }
            409 
            410         bool RichInterpretor::Match(const wchar_t* input, const wchar_t* start, Result& result)
            411         {
            412             const wchar_t* read=input;
            413             while(*read)
            414             {
            415                 if(MatchHead(read, start, result))
            416                 {
            417                     result.start=read-input;
            418                     for(int i=0;i<result.captures.Count();i++)
            419                     {
            420                         result.captures[i].start+=result.start;
            421                     }
            422                     return true;
            423                 }
            424                 read++;
            425             }
            426             return false;
            427         }
            428 
            429         const IReadonlyList<WString>& RichInterpretor::CaptureNames()
            430         {
            431             return dfa->captureNames.Wrap();
            432         }
            433     }
            434 }

                于是帶有各種擴展功能的匹配就完成了。之前還用同樣的前端開發(fā)了一個不帶擴展功能的純匹配的正則表達式引擎。因為過于簡單就不列出來了。這些算法的詳細介紹可以看這里

                下面的工作就是寫更多的單元測試用例,最后將這兩個不同的匹配算法封裝在一起,智能地在正確的情況下選擇更快的算法。這樣正則表達式引擎的效率將會大大提高。
            posted on 2009-11-14 19:13 陳梓瀚(vczh) 閱讀(2469) 評論(1)  編輯 收藏 引用 所屬分類: VL++3.0開發(fā)紀事

            評論:
            # re: Vczh Library++3.0之正則表達式引擎(DFA與捕獲、預查結合的匹配)[未登錄] 2009-11-19 01:52 | megax
            有沒有測試過你的正則的效率?和delex比怎么樣?  回復  更多評論
              
            9191精品国产免费久久| 色播久久人人爽人人爽人人片aV| 欧美精品久久久久久久自慰| 久久久久亚洲AV无码观看| 婷婷久久久亚洲欧洲日产国码AV| 国产精品美女久久久久网| 国产午夜精品理论片久久 | 亚洲精品乱码久久久久久自慰| 久久久久久久亚洲Av无码| 亚洲国产精品婷婷久久| 久久久久99这里有精品10| 免费观看成人久久网免费观看| 久久国产成人午夜aⅴ影院| 亚洲午夜久久久影院伊人| 韩国三级大全久久网站| 免费精品久久天干天干| 亚洲国产成人久久综合碰碰动漫3d| 久久精品极品盛宴观看| 久久99国产亚洲高清观看首页| 中文成人无码精品久久久不卡 | 久久国产精品无码HDAV| 国产精品成人无码久久久久久| 免费无码国产欧美久久18| 国产日韩久久久精品影院首页| 久久精品亚洲一区二区三区浴池| 久久精品国产99久久丝袜| 狠狠色丁香婷婷综合久久来| 中文成人无码精品久久久不卡| 狠狠色综合久久久久尤物| 久久久久国产精品熟女影院| 一本大道久久香蕉成人网| 国产激情久久久久影院小草| 国产精品美女久久久| 潮喷大喷水系列无码久久精品| 日本精品久久久久久久久免费| 91精品国产91久久久久福利| 亚洲熟妇无码另类久久久| 久久婷婷五月综合97色直播| 久久这里都是精品| 国产精品久久久香蕉| 欧美精品国产综合久久|