码迷,mamicode.com
首页 > 其他好文 > 详细

opencv源码分析:cvCreateTreeCascadeClassifier

时间:2015-07-20 19:41:48      阅读:642      评论:0      收藏:0      [点我收藏+]

标签:cvcreatetreecascadec   级联分类器   adaboost   opencv源码分析   cvhaartraining.cpp   

我使用的是opencv2.4.9,cvCreateTreeCascadeClassifier的源码在......opencv\sources\apps\haartraining\cvhaartraining.cpp之中,这中间用到很多结构体,指针函数,宏等各方面的内容,请参考我博客中的文章opencv源码详细解读目录。如需转载请注明本博网址http://blog.csdn.net/ding977921830?viewmode=contents。具体内容如下:


/*
 * cvCreateCascadeClassifier
 *创建级联分类器
 * dirname          -将创建目录分类器的目录名
 *   该目录名必须存在,其包含的子目录为 0, 1, 2, ... (nstages-1).
 * vecfilename      - .vec文件,它包括包含样本图像的文件名
 * bgfilename       - 背景描述文件名
 * bg_vecfile       - 如果bgfilename 代表一个包含离散负样本文件,那么为true
 * npos             - 在每一个强分类器训练所需要的正样本数
 * nneg             - 在每一个强分类器训练所需要的负样本数
 * nstages          - 强分类器的层数
 * numprecalculated - 预计算的特征数量,每一个预计算的特征需要的内存大小为: (number_of_samples*(sizeof( float ) + sizeof( short ))) bytes 
 * numsplits        -每个弱分类器的分支数 
 *   1 - stumps, 2 and more - trees.
 * minhitrate       - 每一级的最小击中率
 * maxfalsealarm    - 每一级的最大虚警率
 * weightfraction   - 剔除小样本的权重
 * mode             - 0 - BASIC = Viola
 *                    1 - CORE  = All upright
 *                    2 - ALL   = All features
 * symmetric        - 非0表示垂直对称
 * equalweights     - 非0表示所有样本的初始化权重相等
 * winwidth         - 样本的宽
 * winheight        - 样本的高
 * boosttype        - 应用的提升算法类型
 *   0 - Discrete AdaBoost
 *   1 - Real AdaBoost
 *   2 - LogitBoost
 *   3 - Gentle AdaBoost
 * stumperror       - 如果是 Discrete AdaBoost算法时使用的树桩错误率
 *   0 - 错分不纯度
 *   1 - 基尼不纯度
 *   2 - 熵不纯度
 */
void cvCreateTreeCascadeClassifier( const char* dirname,               
                                    const char* vecfilename,          
                                    const char* bgfilename,        
                                    int npos, int nneg, int nstages,
                                    int numprecalculated,
                                    int numsplits,
                                    float minhitrate, float maxfalsealarm,
                                    float weightfraction,
                                    int mode, int symmetric,
                                    int equalweights,
                                    int winwidth, int winheight,
                                    int boosttype, int stumperror,
                                    int maxtreesplits, int minpos, bool bg_vecfile )
{
    CvTreeCascadeClassifier* tcc = NULL;
    CvIntHaarFeatures* haar_features = NULL;
    CvHaarTrainingData* training_data = NULL;
    CvMat* vals = NULL;
    CvMat* cluster_idx = NULL;
    CvMat* idx = NULL;
    CvMat* features_idx = NULL;

    CV_FUNCNAME( "cvCreateTreeCascadeClassifier" );

    __BEGIN__;

    int i, k;
    CvTreeCascadeNode* leaves;
    int best_num, cur_num;
    CvSize winsize;
    char stage_name[PATH_MAX];
    char buf[PATH_MAX];
    char* suffix;
    int total_splits;

    int poscount;
    int negcount;
    int consumed;
    double false_alarm;
    double proctime;

    int nleaves;
    double required_leaf_fa_rate;
    float neg_ratio;

    int max_clusters;

    max_clusters = CV_MAX_CLUSTERS;
    neg_ratio = (float) nneg / npos;

    nleaves = 1 + MAX( 0, maxtreesplits );
    required_leaf_fa_rate = pow( (double) maxfalsealarm, (double) nstages ) / nleaves;

    printf( "Required leaf false alarm rate: %g\n", required_leaf_fa_rate );

    total_splits = 0;

    winsize = cvSize( winwidth, winheight );

    CV_CALL( cluster_idx = cvCreateMat( 1, npos + nneg, CV_32SC1 ) );
    CV_CALL( idx = cvCreateMat( 1, npos + nneg, CV_32SC1 ) );

    CV_CALL( tcc = (CvTreeCascadeClassifier*)
        icvLoadTreeCascadeClassifier( dirname, winwidth + 1, &total_splits ) );
    CV_CALL( leaves = icvFindDeepestLeaves( tcc ) );

    CV_CALL( icvPrintTreeCascade( tcc->root ) );

    haar_features = icvCreateIntHaarFeatures( winsize, mode, symmetric );

    printf( "Number of features used : %d\n", haar_features->count );

    training_data = icvCreateHaarTrainingData( winsize, npos + nneg );

    sprintf( stage_name, "%s/", dirname );
    suffix = stage_name + strlen( stage_name );

    if (! bg_vecfile)
      if( !icvInitBackgroundReaders( bgfilename, winsize ) && nstages > 0 )
          CV_ERROR( CV_StsError, "Unable to read negative images" );

    if( nstages > 0 )
    {
        /* width-first search in the tree */
        do
        {
            CvSplit* first_split;
            CvSplit* last_split;
            CvSplit* cur_split;

            CvTreeCascadeNode* parent;
            CvTreeCascadeNode* cur_node;
            CvTreeCascadeNode* last_node;

            first_split = last_split = cur_split = NULL;
            parent = leaves;
            leaves = NULL;
            do
            {
                int best_clusters; /* best selected number of clusters */
                float posweight, negweight;
                double leaf_fa_rate;

                if( parent ) sprintf( buf, "%d", parent->idx );
                else sprintf( buf, "NULL" );
                printf( "\nParent node: %s\n\n", buf );

                printf( "*** 1 cluster ***\n" );

                tcc->eval = icvEvalTreeCascadeClassifierFilter;
                /* find path from the root to the node <parent> */
                icvSetLeafNode( tcc, parent );

                /* load samples */
                consumed = 0;
                poscount = icvGetHaarTrainingDataFromVec( training_data, 0, npos,
                    (CvIntHaarClassifier*) tcc, vecfilename, &consumed );

                printf( "POS: %d %d %f\n", poscount, consumed, ((double) poscount)/consumed );

                if( poscount <= 0 )
                    CV_ERROR( CV_StsError, "Unable to obtain positive samples" );

                fflush( stdout );

                proctime = -TIME( 0 );

                nneg = (int) (neg_ratio * poscount);
                negcount = icvGetHaarTrainingDataFromBG( training_data, poscount, nneg,
                    (CvIntHaarClassifier*) tcc, &false_alarm, bg_vecfile ? bgfilename : NULL );
                printf( "NEG: %d %g\n", negcount, false_alarm );

                printf( "BACKGROUND PROCESSING TIME: %.2f\n", (proctime + TIME( 0 )) );

                if( negcount <= 0 )
                    CV_ERROR( CV_StsError, "Unable to obtain negative samples" );

                leaf_fa_rate = false_alarm;
                if( leaf_fa_rate <= required_leaf_fa_rate )
                {
                    printf( "Required leaf false alarm rate achieved. "
                            "Branch training terminated.\n" );
                }
                else if( nleaves == 1 && tcc->next_idx == nstages )
                {
                    printf( "Required number of stages achieved. "
                            "Branch training terminated.\n" );
                }
                else
                {
                    CvTreeCascadeNode* single_cluster;
                    CvTreeCascadeNode* multiple_clusters;
                    int single_num;

                    icvSetNumSamples( training_data, poscount + negcount );
                    posweight = (equalweights) ? 1.0F / (poscount + negcount) : (0.5F/poscount);
                    negweight = (equalweights) ? 1.0F / (poscount + negcount) : (0.5F/negcount);
                    icvSetWeightsAndClasses( training_data,
                        poscount, posweight, 1.0F, negcount, negweight, 0.0F );

                    fflush( stdout );

                    /* precalculate feature values */
                    proctime = -TIME( 0 );
                    icvPrecalculate( training_data, haar_features, numprecalculated );
                    printf( "Precalculation time: %.2f\n", (proctime + TIME( 0 )) );

                    /* train stage classifier using all positive samples */
                    CV_CALL( single_cluster = icvCreateTreeCascadeNode() );
                    fflush( stdout );

                    proctime = -TIME( 0 );
                    single_cluster->stage =
                        (CvStageHaarClassifier*) icvCreateCARTStageClassifier(
                            training_data, NULL, haar_features,
                            minhitrate, maxfalsealarm, symmetric,
                            weightfraction, numsplits, (CvBoostType) boosttype,
                            (CvStumpError) stumperror, 0 );
                    printf( "Stage training time: %.2f\n", (proctime + TIME( 0 )) );

                    single_num = icvNumSplits( single_cluster->stage );
                    best_num = single_num;
                    best_clusters = 1;
                    multiple_clusters = NULL;

                    printf( "Number of used features: %d\n", single_num );

                    if( maxtreesplits >= 0 )
                    {
                        max_clusters = MIN( max_clusters, maxtreesplits - total_splits + 1 );
                    }

                    /* try clustering */
                    vals = NULL;
                    for( k = 2; k <= max_clusters; k++ )
                    {
                        int cluster;
                        int stop_clustering;

                        printf( "*** %d clusters ***\n", k );

                        /* check whether clusters are big enough */
                        stop_clustering = ( k * minpos > poscount );
                        if( !stop_clustering )
                        {
                            int num[CV_MAX_CLUSTERS];

                            if( k == 2 )
                            {
                                proctime = -TIME( 0 );
                                CV_CALL( vals = icvGetUsedValues( training_data, 0, poscount,
                                    haar_features, single_cluster->stage ) );
                                printf( "Getting values for clustering time: %.2f\n", (proctime + TIME(0)) );
                                printf( "Value matirx size: %d x %d\n", vals->rows, vals->cols );
                                fflush( stdout );

                                cluster_idx->cols = vals->rows;
                                for( i = 0; i < negcount; i++ ) idx->data.i[i] = poscount + i;
                            }

                            proctime = -TIME( 0 );

                            CV_CALL( cvKMeans2( vals, k, cluster_idx, CV_TERM_CRITERIA() ) );

                            printf( "Clustering time: %.2f\n", (proctime + TIME( 0 )) );

                            for( cluster = 0; cluster < k; cluster++ ) num[cluster] = 0;
                            for( i = 0; i < cluster_idx->cols; i++ )
                                num[cluster_idx->data.i[i]]++;
                            for( cluster = 0; cluster < k; cluster++ )
                            {
                                if( num[cluster] < minpos )
                                {
                                    stop_clustering = 1;
                                    break;
                                }
                            }
                        }

                        if( stop_clustering )
                        {
                            printf( "Clusters are too small. Clustering aborted.\n" );
                            break;
                        }

                        cur_num = 0;
                        cur_node = last_node = NULL;
                        for( cluster = 0; (cluster < k) && (cur_num < best_num); cluster++ )
                        {
                            CvTreeCascadeNode* new_node;

                            int num_splits;
                            int last_pos;
                            int total_pos;

                            printf( "Cluster: %d\n", cluster );

                            last_pos = negcount;
                            for( i = 0; i < cluster_idx->cols; i++ )
                            {
                                if( cluster_idx->data.i[i] == cluster )
                                {
                                    idx->data.i[last_pos++] = i;
                                }
                            }
                            idx->cols = last_pos;

                            total_pos = idx->cols - negcount;
                            printf( "# pos: %d of %d. (%d%%)\n", total_pos, poscount,
                                100 * total_pos / poscount );

                            CV_CALL( new_node = icvCreateTreeCascadeNode() );
                            if( last_node ) last_node->next = new_node;
                            else cur_node = new_node;
                            last_node = new_node;

                            posweight = (equalweights)
                                ? 1.0F / (total_pos + negcount) : (0.5F / total_pos);
                            negweight = (equalweights)
                                ? 1.0F / (total_pos + negcount) : (0.5F / negcount);

                            icvSetWeightsAndClasses( training_data,
                                poscount, posweight, 1.0F, negcount, negweight, 0.0F );

                            /* CV_DEBUG_SAVE( idx ); */

                            fflush( stdout );

                            proctime = -TIME( 0 );
                            new_node->stage = (CvStageHaarClassifier*)
                                icvCreateCARTStageClassifier( training_data, idx, haar_features,
                                    minhitrate, maxfalsealarm, symmetric,
                                    weightfraction, numsplits, (CvBoostType) boosttype,
                                    (CvStumpError) stumperror, best_num - cur_num );
                            printf( "Stage training time: %.2f\n", (proctime + TIME( 0 )) );

                            if( !(new_node->stage) )
                            {
                                printf( "Stage training aborted.\n" );
                                cur_num = best_num + 1;
                            }
                            else
                            {
                                num_splits = icvNumSplits( new_node->stage );
                                cur_num += num_splits;

                                printf( "Number of used features: %d\n", num_splits );
                            }
                        } /* for each cluster */

                        if( cur_num < best_num )
                        {
                            icvReleaseTreeCascadeNodes( &multiple_clusters );
                            best_num = cur_num;
                            best_clusters = k;
                            multiple_clusters = cur_node;
                        }
                        else
                        {
                            icvReleaseTreeCascadeNodes( &cur_node );
                        }
                    } /* try different number of clusters */
                    cvReleaseMat( &vals );

                    CvSplit* curSplit;
                    CV_CALL( curSplit = (CvSplit*) cvAlloc( sizeof( *curSplit ) ) );
                    CV_ZERO_OBJ( curSplit );

                    if( last_split ) last_split->next = curSplit;
                    else first_split = curSplit;
                    last_split = curSplit;

                    curSplit->single_cluster = single_cluster;
                    curSplit->multiple_clusters = multiple_clusters;
                    curSplit->num_clusters = best_clusters;
                    curSplit->parent = parent;
                    curSplit->single_multiple_ratio = (float) single_num / best_num;
                }

                if( parent ) parent = parent->next_same_level;
            } while( parent );

            /* choose which nodes should be splitted */
            do
            {
                float max_single_multiple_ratio;

                cur_split = NULL;
                max_single_multiple_ratio = 0.0F;
                last_split = first_split;
                while( last_split )
                {
                    if( last_split->single_cluster && last_split->multiple_clusters &&
                        last_split->single_multiple_ratio > max_single_multiple_ratio )
                    {
                        max_single_multiple_ratio = last_split->single_multiple_ratio;
                        cur_split = last_split;
                    }
                    last_split = last_split->next;
                }
                if( cur_split )
                {
                    if( maxtreesplits < 0 ||
                        cur_split->num_clusters <= maxtreesplits - total_splits + 1 )
                    {
                        cur_split->single_cluster = NULL;
                        total_splits += cur_split->num_clusters - 1;
                    }
                    else
                    {
                        icvReleaseTreeCascadeNodes( &(cur_split->multiple_clusters) );
                        cur_split->multiple_clusters = NULL;
                    }
                }
            } while( cur_split );

            /* attach new nodes to the tree */
            leaves = last_node = NULL;
            last_split = first_split;
            while( last_split )
            {
                cur_node = (last_split->multiple_clusters)
                    ? last_split->multiple_clusters : last_split->single_cluster;
                parent = last_split->parent;
                if( parent ) parent->child = cur_node;

                /* connect leaves via next_same_level and save them */
                for( ; cur_node; cur_node = cur_node->next )
                {
                    FILE* file;

                    if( last_node ) last_node->next_same_level = cur_node;
                    else leaves = cur_node;
                    last_node = cur_node;
                    cur_node->parent = parent;

                    cur_node->idx = tcc->next_idx;
                    tcc->next_idx++;
                    sprintf( suffix, "%d/%s", cur_node->idx, CV_STAGE_CART_FILE_NAME );
                    file = NULL;
                    if( icvMkDir( stage_name ) && (file = fopen( stage_name, "w" )) != 0 )
                    {
                        cur_node->stage->save( (CvIntHaarClassifier*) cur_node->stage, file );
                        fprintf( file, "\n%d\n%d\n",
                            ((parent) ? parent->idx : -1),
                            ((cur_node->next) ? tcc->next_idx : -1) );
                    }
                    else
                    {
                        printf( "Failed to save classifier into %s\n", stage_name );
                    }
                    if( file ) fclose( file );
                }

                if( parent ) sprintf( buf, "%d", parent->idx );
                else sprintf( buf, "NULL" );
                printf( "\nParent node: %s\n", buf );
                printf( "Chosen number of splits: %d\n\n", (last_split->multiple_clusters)
                    ? (last_split->num_clusters - 1) : 0 );

                cur_split = last_split;
                last_split = last_split->next;
                cvFree( &cur_split );
            } /* for each split point */

            printf( "Total number of splits: %d\n", total_splits );

            if( !(tcc->root) ) tcc->root = leaves;
            CV_CALL( icvPrintTreeCascade( tcc->root ) );

        } while( leaves );

        /* save the cascade to xml file */
        {
            char xml_path[1024];
            int len = (int)strlen(dirname);
            CvHaarClassifierCascade* cascade = 0;
            strcpy( xml_path, dirname );
            if( xml_path[len-1] == '\\' || xml_path[len-1] == '/' )
                len--;
            strcpy( xml_path + len, ".xml" );
            cascade = cvLoadHaarClassifierCascade( dirname, cvSize(winwidth,winheight) );
            if( cascade )
                cvSave( xml_path, cascade );
            cvReleaseHaarClassifierCascade( &cascade );
        }

    } /* if( nstages > 0 ) */

    /* check cascade performance */
    printf( "\nCascade performance\n" );

    tcc->eval = icvEvalTreeCascadeClassifier;

    /* load samples */
    consumed = 0;
    poscount = icvGetHaarTrainingDataFromVec( training_data, 0, npos,
        (CvIntHaarClassifier*) tcc, vecfilename, &consumed );

    printf( "POS: %d %d %f\n", poscount, consumed,
        (consumed > 0) ? (((float) poscount)/consumed) : 0 );

    if( poscount <= 0 )
        fprintf( stderr, "Warning: unable to obtain positive samples\n" );

    proctime = -TIME( 0 );

    negcount = icvGetHaarTrainingDataFromBG( training_data, poscount, nneg,
        (CvIntHaarClassifier*) tcc, &false_alarm, bg_vecfile ? bgfilename : NULL );

    printf( "NEG: %d %g\n", negcount, false_alarm );

    printf( "BACKGROUND PROCESSING TIME: %.2f\n", (proctime + TIME( 0 )) );

    if( negcount <= 0 )
        fprintf( stderr, "Warning: unable to obtain negative samples\n" );

    __END__;

    if (! bg_vecfile)
      icvDestroyBackgroundReaders();

    if( tcc ) tcc->release( (CvIntHaarClassifier**) &tcc );
    icvReleaseIntHaarFeatures( &haar_features );
    icvReleaseHaarTrainingData( &training_data );
    cvReleaseMat( &cluster_idx );
    cvReleaseMat( &idx );
    cvReleaseMat( &vals );
    cvReleaseMat( &features_idx );
}


版权声明:本文为博主原创文章,未经博主允许不得转载。

opencv源码分析:cvCreateTreeCascadeClassifier

标签:cvcreatetreecascadec   级联分类器   adaboost   opencv源码分析   cvhaartraining.cpp   

原文地址:http://blog.csdn.net/ding977921830/article/details/46970197

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!