Changeset 16527


Ignore:
Timestamp:
Aug 12, 2016, 9:25:58 AM (3 years ago)
Author:
dferreira
Message:

Check cache before send assynchronous message

Location:
internals/2016/aptoideimagesdetector/trunk/aptoide_mature_app_detector
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • internals/2016/aptoideimagesdetector/trunk/aptoide_mature_app_detector/aptoide_mature_app_detector/explicit_content_detector/API/Explicit_detector/analyse_app.py

    r16525 r16527  
    44
    55# Main function to analyse the percentage of explicitness of an app
     6# Caches the result on a database
    67
    78from __future__ import division
     
    3940def analyse_app(app_id, app_md5, illust2vec, icons, screenshots, description, category, age, size, title, cache_reload):
    4041
     42        # Loads configurations from file
    4143        p = os.path.abspath(os.path.join("../", os.pardir))
    4244        tb_id = 0
     
    8284        screens = []
    8385        for icon in icons:
     86
    8487                # Check if has already been analysed.
    85                 # If it has, just append the previous result.
    86                 # If not, save the new result
    8788                c.execute(''' SELECT image_exp, image_safe FROM image_results WHERE url=?''',(icon,))
    8889                res = c.fetchone()
     90               
    8991                # If is not needed to reload the cache, tries to return the result saved
    9092                if cache_reload==0:
    9193                        if res:
    9294                                icon_l.append((('explicit',res[0]), ('safe',res[1])))
     95                        # If there were no previous results, inserts new results
    9396                        else:
    9497                                res = analyse_explicit(illust2vec, icon)
     
    118121        for scr in screenshots:
    119122                # Check if has already been analysed.
    120                 # If it has, just append the previous result.
    121                 # If not, save the new result
    122123                c.execute(''' SELECT image_exp, image_safe FROM image_results WHERE url=?''',(scr,))
    123124                res = c.fetchone()
     
    126127                        if res:
    127128                                screens.append((('explicit',res[0]), ('safe',res[1])))
     129                        # If there were no previous results, inserts new results
    128130                        else:
    129131                                res = analyse_explicit(illust2vec, scr)
  • internals/2016/aptoideimagesdetector/trunk/aptoide_mature_app_detector/aptoide_mature_app_detector/explicit_content_detector/API/views.py

    r16526 r16527  
    1010
    1111# If you don't have acess to local images, change the config.json "local_or_web_images" to "web" instead of "local"
     12
    1213from threading import Thread
    1314import os
     
    2122import urllib2
    2223import json
     24import sqlite3
    2325
    2426# Save the Illustration2Vec model on memory
     
    2830        _local = True
    2931
    30 
     32# Decorator to use for multi-threading
    3133def postpone(function):
    3234  def decorator(*args, **kwargs):
     
    3638  return decorator
    3739
    38 
     40# Gets the local url of an image
    3941def local_url(parent_dir, image_path):
    4042        try:
     
    4648        return final_path
    4749
    48 # Function that analyses the information of an app and returns if it is explicit or not
     50# Asycnhronous function that analyses the information of an app and returns if it is explicit or not
    4951@postpone
    50 def get_data_async(page, url_async, cache_reload):
     52def get_data_async(page, url_async, cache_reload, md5_or_id):
    5153        illust2vec = Model._model
    5254        exp = False
    5355
     56        # Loads the configuration file
    5457        p = os.path.abspath(os.path.join("../", os.pardir))
    5558
     
    5760                config = json.load(json_data)
    5861
     62        # If web is set, the script will try to parse the iamges from the web services
    5963        if config['directories']['local_or_web_images']=='web':
    6064                        Model._local = False
    6165
    62 
    6366        print "Fetching data..."
    6467        title = page['meta']['title']
    6568
     69        app_id = page['apk']['id']
     70
     71        md5 = page['apk']['md5sum']
     72
    6673        description = page['meta']['description']
    6774
     
    7683        scr = []
    7784        scr_hd = []
    78         # Relative path to our images. To use on outer machine, uncomment the following code and comment the next line after that code.
     85       
    7986        if 'sshots' in page['media']:
    8087                for s in page['media']['sshots']:
     
    126133                if dist>0.5:
    127134                        exp = True
    128         try:
    129                 print "Opening "+str(url_async)+str(exp)
    130                 return urllib2.urlopen(str(url_async)+str(exp))
     135        # Tries to open url_async/True or url_async/False to sinalize the end of script
     136        try:
     137                if md5_or_id=="md5":
     138                        print "Opening "+str(url_async)+"md5sum="+md5+"/mature="+str(exp)
     139                        return urllib2.urlopen(str(url_async)+"md5sum="+md5+"/mature="+str(exp))
     140                else:
     141                        print "Opening "+str(url_async)+"id="+str(app_id)+"/mature="+str(exp)
     142                        return urllib2.urlopen(str(url_async)+"id="+str(app_id)+"/mature="+str(exp))
    131143        except:
    132144                print "Could not open the webpage"
    133145
    134 # Function that analyses the information of an app and returns if it is explicit or not
     146# Synchronous function that analyses the information of an app and returns if it is explicit or not
    135147def get_data_sync(page, cache_reload):
    136148        illust2vec = Model._model
    137149        exp = False
    138150
     151        # Loads the configuration file
    139152        p = os.path.abspath(os.path.join("../", os.pardir))
    140153
     
    142155                config = json.load(json_data)
    143156
     157        # If web is set, the script will try to parse the iamges from the web services
    144158        if config['directories']['local_or_web_images']=='web':
    145159                        Model._local = False
     
    149163        title = page['meta']['title']
    150164
     165        app_id = page['apk']['id']
     166
    151167        description = page['meta']['description']
    152168
    153169        categories = []
     170
    154171
    155172        for cat in page['meta']['categories']['standard']:
     
    161178        scr = []
    162179        scr_hd = []
    163         # Relative path to our images. To use on outer machine, uncomment the following code and comment the next line after that code.
     180       
    164181        if 'sshots' in page['media']:
    165182                for s in page['media']['sshots']:
     
    220237
    221238def getbyId(request, app_id, cache_reload=0):
     239
     240        # If cache relaod is 1, it will force the content to be rewritten in the cache
    222241        if not cache_reload:
    223242                cache_reload=0
    224243        now = datetime.now()
    225244        url = "http://webservices.aptoide.com/webservices/3/getApkInfo/id:"
     245       
     246        # Loads the configurations from a config file
    226247        p = os.path.abspath(os.path.join("../", os.pardir))
    227248        with open(p+"/config.json") as json_data:
     
    232253                webpage = urllib2.urlopen(url+str(app_id)+"/json")
    233254                soup = BeautifulSoup(webpage, "lxml")
    234                                                        
     255                                                               
    235256                text = soup.get_text()
    236257                page = json.loads(text)
    237258                if page['status']!='FAIL':
     259                        app_md5 = page['apk']['md5sum']
     260                        # Check if is asynchronous or synchronous
    238261                        if config["synchronous_or_asynchronous"]=="asynchronous":
    239                                 get_data_async(page, config["directories"]["asynchronous_dir"], cache_reload)
    240                                 return HttpResponse("Waiting")
     262                                # If cache_reload = 0, check cache
     263                                if cache_reload==0:
     264                                        db = sqlite3.connect(config['directories']['final_database'])
     265                                        c = db.cursor()
     266                                        c.execute(''' SELECT ID FROM app WHERE app_md5=? ''',(app_md5,))
     267                                        res = c.fetchone()
     268                                        if res:
     269                                                # If we have the result, return it
     270                                                c.execute(''' SELECT is_mature FROM final_results WHERE for_id=? ''',(res[0],))
     271                                                res2 = c.fetchone()
     272                                                if res2:
     273                                                        db.close()
     274                                                        res=""
     275                                                        if res2[0]>0.5:
     276                                                                res="yes"
     277                                                        else:
     278                                                                res="no"
     279                                                        return HttpResponse(json.dumps({'app_id': int(app_id), 'status': 'OK','mature_content': res, 'time':str(datetime.now()-now)}, sort_keys=True), content_type='application/json')
     280
     281                                get_data_async(page, config["directories"]["asynchronous_dir"], cache_reload, "id")
     282                                return HttpResponse(json.dumps({'app_id': int(app_id), 'status': 'request_submitted','mature_content': '', 'time':str(datetime.now()-now)}, sort_keys=True), content_type='application/json')
    241283                        exp = get_data_sync(page, int(cache_reload))
    242284
     
    244286                        status = 'Failed'
    245287                        print "App does not exist"
    246                         return HttpResponse(json.dumps({'status': 'Failed', 'time':str(datetime.now()-now)}, sort_keys=True))
     288                        return HttpResponse(json.dumps({'status': 'Failed', 'time':str(datetime.now()-now)}, sort_keys=True), content_type='application/json')
    247289        except:
    248290                status = 'Failed'
    249291                print "Error during parsing"
    250                 return HttpResponse(json.dumps({'status': 'Failed', 'time':str(datetime.now()-now)}, sort_keys=True))
     292                return HttpResponse(json.dumps({'status': 'Failed', 'time':str(datetime.now()-now)}, sort_keys=True), content_type='application/json')
    251293
    252294        # If it's explicit content, redirects to true page. Otherwise, redirects to false page.
    253295        if exp:
    254                 return HttpResponse(json.dumps({'app_id': int(app_id), 'status': 'OK','mature_content': 'yes', 'time':str(datetime.now()-now)}, sort_keys=True))
    255         return HttpResponse(json.dumps({'app_id': int(app_id), 'status': 'OK','mature_content': 'no', 'time':str(datetime.now()-now)}, sort_keys=True))
     296                return HttpResponse(json.dumps({'app_id': int(app_id), 'status': 'OK','mature_content': 'yes', 'time':str(datetime.now()-now)}, sort_keys=True), content_type='application/json')
     297        return HttpResponse(json.dumps({'app_id': int(app_id), 'status': 'OK','mature_content': 'no', 'time':str(datetime.now()-now)}, sort_keys=True), content_type='application/json')
    256298
    257299
     
    260302# For more detailed error logs comment the try... except and indentate correctly its content
    261303def getbyMD5(request, app_md5, cache_reload=0):
     304       
     305        # If cache relaod is 1, it will force the content to be rewritten in the cache
    262306        if not cache_reload:
    263307                cache_reload=0
    264308        now = datetime.now()
    265309        url = "http://webservices.aptoide.com/webservices/3/getApkInfo/md5sum:"
     310        # Loads the configurations from a config file
    266311        p = os.path.abspath(os.path.join("../", os.pardir))
    267312        with open(p+"/config.json") as json_data:
     
    279324                if page['status']!='FAIL':
    280325                        app_id = page['apk']['id']
     326                        # Check if is asynchronous or synchronous
    281327                        if config["synchronous_or_asynchronous"]=="asynchronous":
    282                                 get_data_async(page, config["directories"]["asynchronous_dir"], cache_reload)
    283                                 return HttpResponse("Waiting")
     328                                # If cache_reload = 0, check cache
     329                                if cache_reload==0:
     330                                        db = sqlite3.connect(config['directories']['final_database'])
     331                                        c = db.cursor()
     332                                        c.execute(''' SELECT ID FROM app WHERE app_md5=? ''',(app_md5,))
     333                                        res = c.fetchone()
     334                                        if res:
     335                                                # If we have the result, return it
     336                                                c.execute(''' SELECT is_mature FROM final_results WHERE for_id=? ''',(res[0],))
     337                                                res2 = c.fetchone()
     338                                                if res2:
     339                                                        db.close()
     340                                                        res=""
     341                                                        if res2[0]>0.5:
     342                                                                res="yes"
     343                                                        else:
     344                                                                res="no"
     345                                                        return HttpResponse(json.dumps({'app_id': int(app_id), 'status': 'OK','mature_content': res, 'time':str(datetime.now()-now)}, sort_keys=True), content_type='application/json')
     346
     347                                get_data_async(page, config["directories"]["asynchronous_dir"], cache_reload, "md5")
     348                                return HttpResponse(json.dumps({'app_id': int(app_id), 'status': 'request_submitted','mature_content': '', 'time':str(datetime.now()-now)}, sort_keys=True), content_type='application/json')
    284349                        exp = get_data_sync(page, int(cache_reload))
    285350
     
    287352                        status = 'Failed'
    288353                        print "App does not exist"
    289                         return HttpResponse(json.dumps({'status': 'Failed', 'time':str(datetime.now()-now)}, sort_keys=True))
     354                        return HttpResponse(json.dumps({'status': 'Failed', 'time':str(datetime.now()-now)}, sort_keys=True), content_type='application/json')
    290355        except:
    291356                status = 'Failed'
    292357                print "Error during parsing"
    293                 return HttpResponse(json.dumps({'status': 'Failed', 'time':str(datetime.now()-now)}, sort_keys=True))
     358                return HttpResponse(json.dumps({'status': 'Failed', 'time':str(datetime.now()-now)}, sort_keys=True), content_type='application/json')
    294359
    295360        # If it's explicit content, redirects to true page. Otherwise, redirects to false page.
    296361        if exp:
    297                 return HttpResponse(json.dumps({'app_id': app_id, 'status': 'OK','mature_content': 'yes', 'time':str(datetime.now()-now)}, sort_keys=True))
    298         return HttpResponse(json.dumps({'app_id': app_id, 'status': 'OK','mature_content': 'no', 'time':str(datetime.now()-now)}, sort_keys=True))
     362                return HttpResponse(json.dumps({'app_id': app_id, 'status': 'OK','mature_content': 'yes', 'time':str(datetime.now()-now)}, sort_keys=True), content_type='application/json')
     363        return HttpResponse(json.dumps({'app_id': app_id, 'status': 'OK','mature_content': 'no', 'time':str(datetime.now()-now)}, sort_keys=True), content_type='application/json')
  • internals/2016/aptoideimagesdetector/trunk/aptoide_mature_app_detector/config.json

    r16524 r16527  
    11{
    2         "synchronous_or_asynchronous": "synchronous",
     2        "synchronous_or_asynchronous": "asynchronous",
    33        "directories": {
    44                "_Comments":"paths should always be relative to django project (where you run manage.py runserver)",
     
    1111                "text_cat_model": "API/Explicit_detector/model_apps_info.pickle",
    1212                "final_database": "API/Explicit_detector/Final_results.db",
    13                 "asynchronous_dir": "google.pt/"
     13                "asynchronous_dir": "http://google.pt/"
    1414        }
    1515       
  • internals/2016/aptoideimagesdetector/trunk/aptoide_mature_app_detector/readme.rst

    r16487 r16527  
    2222
    2323That's it!
     24
     25---------------------------
     26Optional:
     27
     28If you don't have acess to local images, change the config.json "local_or_web_images" to "web" instead of "local"
     29
     30On config file, you can also change to asynchronous requests. That way, you would also need to specify the asynchronous_dir which would be acessed on the end of the request.
     31
     32If you want to force the cache to analyse the app again, just add /reload=1 to the end of the url. For example, 127.0.0.1:8000/detect_mature/id=app_id/reload=1
  • internals/2016/aptoideimagesdetector/trunk/aptoide_mature_app_detector/requirements.txt

    r16516 r16527  
    11Aptoide Mature App Detector
    22
    3 Requirements:
     3Requirements (on Ubuntu 16.04):
    44-> Python 2.x
    55-> Django
     
    88-> Scipy
    99-> Pillow
    10 -> Chainer
     10-> Chainer==1.11.0
    1111-> NLTK
    1212-> MatPlotLib
    1313-> Scikit-Image
    1414-> Scikit-Learn
     15-> Libatlas-base-dev
    1516
    1617It is also needed to download libraries from NLTK, after pip install.
     
    2021
    2122If you don't have acess to local images, change the config.json "local_or_web_images" to "web" instead of "local"
     23
     24On config file, you can also change to asynchronous requests. That way, you would also need to specify the asynchronous_dir which would be acessed on the end of the request.
Note: See TracChangeset for help on using the changeset viewer.