Changeset 16368


Ignore:
Timestamp:
Jul 22, 2016, 12:01:05 PM (3 years ago)
Author:
dferreira
Message:

Changes to language extractor tests and excel with the results.

Location:
internals/2016/aptoideimagesdetector/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • internals/2016/aptoideimagesdetector/trunk/Source Code/Language Extractor/language_extractor.py

    r16366 r16368  
    174174#print testing_set
    175175print str(datetime.now()-now)
     176# Step 10: With the original Naive Bayes, print Classification. Try with others classifiers
    176177
    177178print "Training..."
    178 # Step 10: With the original Naive Bayes, print Classification. Try with others classifiers
    179 classifier = nltk.NaiveBayesClassifier.train(training_set)
    180 refsets = collections.defaultdict(set)
    181 testsets = collections.defaultdict(set)
    182 
    183 for i, (features, label) in enumerate(testing_set):
    184         refsets[label].add(i)
    185         observed = classifier.classify(features)
    186         testsets[observed].add(i)
    187 
    188 print 'Explicit Precision: ', precision(refsets['exp'], testsets['exp'])
    189 print 'Explicit recall: ', recall(refsets['exp'], testsets['exp'])
    190 print 'Explicit F-Score: ', f_measure(refsets['exp'], testsets['exp'])
    191 print 'Non-Explicit Precision: ', precision(refsets['non'], testsets['non'])
    192 print 'Non-Explicit Recall: ', recall(refsets['non'], testsets['non'])
    193 print 'Non-Explicit F-Score: ', f_measure(refsets['non'], testsets['non'])
    194 
    195 print "Original Naive Bayes Algo Accuracy percent: ", (nltk.classify.accuracy(classifier, testing_set))*100
    196 
    197 classifier.show_most_informative_features(n_most_informative_features)
    198 
    199 RFC_classifier = SklearnClassifier(RandomForestClassifier())
    200 RFC_classifier.train(training_set)
    201 print "RFC_classifier Algo Accuracy percent: ", (nltk.classify.accuracy(RFC_classifier, testing_set))*100
    202 
    203 ADA_classifier = SklearnClassifier(AdaBoostClassifier())
    204 ADA_classifier.train(training_set)
    205 print "ADA_classifier Algo Accuracy percent: ", (nltk.classify.accuracy(ADA_classifier, testing_set))*100
    206 
    207 MNB_classifier = SklearnClassifier(MultinomialNB())
    208 MNB_classifier.train(training_set)
    209 print "MNB_classifier Algo Accuracy percent: ", (nltk.classify.accuracy(MNB_classifier, testing_set))*100
    210 
    211 TREE_classifier = SklearnClassifier(tree.DecisionTreeClassifier())
    212 TREE_classifier.train(training_set)
    213 print "TREE_classifier Algo Accuracy percent: ", (nltk.classify.accuracy(TREE_classifier, testing_set))*100
    214 
    215 BNB_classifier = SklearnClassifier(BernoulliNB())
    216 BNB_classifier.train(training_set)
    217 print "BNB_classifier Algo Accuracy percent: ", (nltk.classify.accuracy(BNB_classifier, testing_set))*100
    218 
    219 LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
    220 LogisticRegression_classifier.train(training_set)
    221 print "LogisticRegression_classifier Algo Accuracy percent: ", (nltk.classify.accuracy(LogisticRegression_classifier, testing_set))*100
    222 
    223 SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
    224 SGDClassifier_classifier.train(training_set)
    225 print "SGDClassifier_classifier Algo Accuracy percent: ", (nltk.classify.accuracy(SGDClassifier_classifier, testing_set))*100
    226 
    227 SVCClassifier_classifier = SklearnClassifier(SVC())
    228 SVCClassifier_classifier.train(training_set)
    229 print "SVCClassifier_classifier Algo Accuracy percent: ", (nltk.classify.accuracy(SVCClassifier_classifier, testing_set))*100
    230 
    231 LinearSVCClassifier_classifier = SklearnClassifier(LinearSVC())
    232 LinearSVCClassifier_classifier.train(training_set)
    233 print "LinearSVCClassifier_classifier Algo Accuracy percent: ", (nltk.classify.accuracy(LinearSVCClassifier_classifier, testing_set))*100
    234 
    235 #NuSVCClassifier_classifier = SklearnClassifier(NuSVC())
    236 #NuSVCClassifier_classifier.train(training_set)
    237 #print "LinearSVCClassifier_classifier Algo Accuracy percent: ", (nltk.classify.accuracy(NuSVCClassifier_classifier, testing_set))*100
     179
     180def results(classifier, testing_set, training_set):
     181        now = datetime.now()
     182        classifier = classifier.train(training_set)
     183        refsets = collections.defaultdict(set)
     184        testsets = collections.defaultdict(set)
     185
     186        for i, (features, label) in enumerate(testing_set):
     187                refsets[label].add(i)
     188                observed = classifier.classify(features)
     189                testsets[observed].add(i)
     190
     191        print "Time training: "+ str(datetime.now()-now)
     192        print 'Explicit Precision: ', precision(refsets['exp'], testsets['exp'])
     193        print 'Explicit recall: ', recall(refsets['exp'], testsets['exp'])
     194        print 'Explicit F-Score: ', f_measure(refsets['exp'], testsets['exp'])
     195        print 'Non-Explicit Precision: ', precision(refsets['non'], testsets['non'])
     196        print 'Non-Explicit Recall: ', recall(refsets['non'], testsets['non'])
     197        print 'Non-Explicit F-Score: ', f_measure(refsets['non'], testsets['non'])
     198
     199        print "Accuracy percent: ", (nltk.classify.accuracy(classifier, testing_set))*100
     200
     201print "****** NAIVE BAYES ************"
     202results(nltk.NaiveBayesClassifier, testing_set, training_set)
     203print "****** Random Forest ************"
     204results(SklearnClassifier(RandomForestClassifier()), testing_set, training_set)
     205print "****** ADA BOOST ************"
     206results(SklearnClassifier(AdaBoostClassifier()), testing_set, training_set)
     207print "****** MULTINOMIAL ************"
     208results(SklearnClassifier(MultinomialNB()), testing_set, training_set)
     209print "****** DECISION TREE ************"
     210results(SklearnClassifier(tree.DecisionTreeClassifier()), testing_set, training_set)
     211print "****** BERNOULLI ************"
     212results(SklearnClassifier(BernoulliNB()), testing_set, training_set)
     213print "****** LOGISTIC REGRESSION ************"
     214results(SklearnClassifier(LogisticRegression()), testing_set, training_set)
     215print "****** SGD CLASSIFIER ************"
     216results(SklearnClassifier(SGDClassifier()), testing_set, training_set)
     217print "****** SVC ************"
     218results(SklearnClassifier(SVC()), testing_set, training_set)
     219print "****** LINEAR SVC ************"
     220results(SklearnClassifier(LinearSVC()), testing_set, training_set)
     221
     222#classifier.show_most_informative_features(n_most_informative_features)
     223'''
    238224
    239225voted_classifier = VoteClassifier(classifier,
     
    262248# Step 12: Research about classifier parameters and decide which is better.
    263249
    264 # Step 13: Save classifier with pickle
    265 
     250# Step 13: Save classifier with pickle'''
     251
Note: See TracChangeset for help on using the changeset viewer.