Changeset 16409


Ignore:
Timestamp:
Jul 26, 2016, 4:00:37 PM (3 years ago)
Author:
dferreira
Message:

Tests and final changes to Text_Categorization-tests done

Location:
internals/2016/aptoideimagesdetector/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • internals/2016/aptoideimagesdetector/trunk/Source Code/Text_categorization/Text_categorization-tests.py

    r16406 r16409  
    3535
    3636# Length of test set
     37# Not used on k-fold cross validation
    3738#number_testing = 500
     39
    3840# Number of most common words used for classifier
    3941# With NuSVC:
     
    4345# 7000 Words Accuracy: 96.8
    4446n_common_words = 6000
     47
    4548# Number of most informative features
    4649#n_most_informative_features = 25
     
    172175
    173176# Step 8: Shuffle feature sets
     177# Not used in k-fold cross validation
    174178#random.shuffle(featuresets)
    175179
     180
    176181# Step 9: Create training set and testing set from feature sets
     182# Regular tests
    177183#training_set = featuresets[:exp_size+non_size-number_testing]
    178184#print training_set
     
    181187#print str(datetime.now()-now)
    182188
    183 explicit_feat = [feature for feature in featuresets if feature[4]=='exp']
    184 non_explicit_feat = [feature for feature in featuresets if feature[4]=='non']
    185 
     189# Cross validation tests
     190explicit_feat = [feature for feature in featuresets if feature[1]=='exp']
     191non_explicit_feat = [feature for feature in featuresets if feature[1]=='non']
     192
     193# While only needed on k-fold cross validation tests
    186194i=0
     195# K Value for k-fold cross validation tests
     196k=5
     197# Nu Value
     198n=0.1
    187199while i<5:
    188         # Step 10: With the original Naive Bayes, print Classification. Try with others classifiers
    189 
     200        # Step 10: With different classifiers, print Classification and other metrics.
    190201        print "Training..."
    191202
    192         testing_set = explicit_feat[(i*len(explicit_feat)/5):((i+1)*len(explicit_feat)/5)]+non_explicit_feat[(i*len(non_explicit_feat)/5):((i+1)*len(non_explicit_feat)/5)]
    193         print "Testing: "+str(len(testing_set))
    194         training_set = [x for j,x in enumerate(explicit_feat) if j<(i*len(explicit_feat)/5) or j>((i+1)*len(explicit_feat)/5)]
    195         training_set += [x for j,x in enumerate(non_explicit_feat) if j<(i*len(non_explicit_feat)/5) or j>((i+1)*len(non_explicit_feat)/5)]
    196         print "Training: "+str(len(training_set))
    197 
     203        # Only needed for k-fold cross validation
     204        testing_set = explicit_feat[int((i*len(explicit_feat)/k)):int(((i+1)*len(explicit_feat)/k))]+non_explicit_feat[int((i*len(non_explicit_feat)/k)):int(((i+1)*len(non_explicit_feat)/k))]
     205        #print "Testing: "+str(len(testing_set))
     206        training_set = [x for j,x in enumerate(explicit_feat) if j<(i*len(explicit_feat)/k) or j>((i+1)*len(explicit_feat)/k)]
     207        training_set += [x for j,x in enumerate(non_explicit_feat) if j<(i*len(non_explicit_feat)/k) or j>((i+1)*len(non_explicit_feat)/k)]
     208        #print "Training: "+str(len(training_set))
    198209        random.shuffle(training_set)
    199210        random.shuffle(testing_set)
     
    242253        try:
    243254                print "\n****** NU SVC ************"
    244                 saving_model=results(SklearnClassifier(NuSVC(nu=0.1)), testing_set, training_set)
     255                saving_model=results(SklearnClassifier(NuSVC(nu=n, kernel='rbf')), testing_set, training_set)
    245256        except:
    246257                pass
Note: See TracChangeset for help on using the changeset viewer.