Changeset 16372


Ignore:
Timestamp:
Jul 22, 2016, 1:56:49 PM (3 years ago)
Author:
dferreira
Message:

Results updated

Location:
internals/2016/aptoideimagesdetector/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • internals/2016/aptoideimagesdetector/trunk/Source Code/Language Extractor/language_extractor.py

    r16368 r16372  
    7878        exp_size+=1
    7979
    80 c2.execute(''' SELECT description FROM app_data WHERE majority=0 ''')
    81 
     80c.execute(''' SELECT description FROM app_data WHERE majority=1''')
     81
     82for d in c.fetchall():
     83        explicit_content.append(d[0])
     84        documents.append((d[0],'exp'))
     85        exp_size += 1
     86
     87
     88c.execute(''' SELECT description FROM app_data WHERE majority=0''')
     89
     90for d in c.fetchall():
     91        non_explicit_content.append(d[0])
     92        documents.append((d[0],'non'))
     93        non_size += 1
     94
     95        # Non_size==exp_size
     96        if non_size==exp_size:
     97                break
     98
     99#c2.execute(''' SELECT description FROM app_data WHERE majority=0 ''')
     100'''
    82101for d in c2.fetchall():
    83102        non_explicit_content.append(d[0])
    84103        documents.append((d[0],'non'))
    85104        non_size += 1
    86 
    87 c.execute(''' SELECT description FROM app_data WHERE majority=1''')
    88 
    89 for d in c.fetchall():
    90         explicit_content.append(d[0])
    91         documents.append((d[0],'exp'))
    92         exp_size += 1
    93 
    94 c.execute(''' SELECT description FROM app_data WHERE majority=0''')
    95 
    96 for d in c.fetchall():
    97         non_explicit_content.append(d[0])
    98         documents.append((d[0],'non'))
    99         non_size += 1
     105'''
    100106       
    101107print "Explicit descriptions: "+str(exp_size)
     
    174180#print testing_set
    175181print str(datetime.now()-now)
     182
    176183# Step 10: With the original Naive Bayes, print Classification. Try with others classifiers
    177184
     
    199206        print "Accuracy percent: ", (nltk.classify.accuracy(classifier, testing_set))*100
    200207
    201 print "****** NAIVE BAYES ************"
     208print "\n****** NAIVE BAYES ************"
    202209results(nltk.NaiveBayesClassifier, testing_set, training_set)
    203 print "****** Random Forest ************"
     210print "\n****** Random Forest ************"
    204211results(SklearnClassifier(RandomForestClassifier()), testing_set, training_set)
    205 print "****** ADA BOOST ************"
     212print "\n****** ADA BOOST ************"
    206213results(SklearnClassifier(AdaBoostClassifier()), testing_set, training_set)
    207 print "****** MULTINOMIAL ************"
     214print "\n****** MULTINOMIAL ************"
    208215results(SklearnClassifier(MultinomialNB()), testing_set, training_set)
    209 print "****** DECISION TREE ************"
     216print "\n****** DECISION TREE ************"
    210217results(SklearnClassifier(tree.DecisionTreeClassifier()), testing_set, training_set)
    211 print "****** BERNOULLI ************"
     218print "\n****** BERNOULLI ************"
    212219results(SklearnClassifier(BernoulliNB()), testing_set, training_set)
    213 print "****** LOGISTIC REGRESSION ************"
     220print "\n****** LOGISTIC REGRESSION ************"
    214221results(SklearnClassifier(LogisticRegression()), testing_set, training_set)
    215 print "****** SGD CLASSIFIER ************"
     222print "\n****** SGD CLASSIFIER ************"
    216223results(SklearnClassifier(SGDClassifier()), testing_set, training_set)
    217 print "****** SVC ************"
     224print "\n****** SVC ************"
    218225results(SklearnClassifier(SVC()), testing_set, training_set)
    219 print "****** LINEAR SVC ************"
     226print "\n****** LINEAR SVC ************"
    220227results(SklearnClassifier(LinearSVC()), testing_set, training_set)
    221228
     
    223230'''
    224231
    225 voted_classifier = VoteClassifier(classifier,
    226         MNB_classifier,
    227         ADA_classifier,
    228         TREE_classifier,
    229         RFC_classifier,
    230         BNB_classifier,
    231         LogisticRegression_classifier,
    232         SGDClassifier_classifier,
    233         SVCClassifier_classifier,
    234 #       NuSVCClassifier_classifier,
    235         LinearSVCClassifier_classifier)
    236 print "Voted_classifier accuracy percent:", (nltk.classify.accuracy(voted_classifier, testing_set))*100
    237 
    238 print "Classification:", voted_classifier.classify(testing_set[0][0])*100
    239 print "Classification:", voted_classifier.classify(testing_set[1][0])*100
    240 print "Classification:", voted_classifier.classify(testing_set[2][0])*100
    241 print "Classification:", voted_classifier.classify(testing_set[3][0])*100
    242 print "Classification:", voted_classifier.classify(testing_set[4][0])*100
    243 print "Classification:", voted_classifier.classify(testing_set[5][0])*100
    244 
    245 
    246 # Step 11: Create Classifier class and try to decide which of the classifiers is more accurate.
     232# Step 11: Decide which of the classifiers is more accurate.
    247233
    248234# Step 12: Research about classifier parameters and decide which is better.
Note: See TracChangeset for help on using the changeset viewer.