source: internals/2016/aptoideimagesdetector/trunk/Source Code/Linguage Extractor/Initial language extractor/get_aptoide_app_info.py @ 16341

Last change on this file since 16341 was 16341, checked in by dferreira, 3 years ago

Start of script that get id's from aptoide apps.

File size: 1.1 KB
Line 
1from bs4 import BeautifulSoup
2import urllib2
3import json
4import sys
5
6# Database: ID|TITLE|DESCRIPTION|MAJORITY|SCREENSHOT|IMAGES
7
8# 1 App
9# From a list with id's, acess that and save on a database: title, screenshot url, images url's, description and majority content
10# Save on a file screenshot's and image url's
11# Mark as visited
12
13# 2 App
14# Receives from input a query and a number
15# Returns a list with all the id's for that query
16
17#As for our specific use case scenarios:
18#- Search: http://ws2.aptoide.com/api/7/listSearchApps/query=<SEARCH_QUERY>
19
20#- App: http://ws2.aptoide.com/api/7/getAppMeta/app_id=<APP_ID>
21
22def get_list_id(query):
23
24        url = "http://ws2.aptoide.com/api/7/listSearchApps/query="
25
26        all_ids = []
27
28        try:
29                webpage = urllib2.urlopen(url+query)
30                soup = BeautifulSoup(webpage, "lxml")
31                for p in soup.p:
32                        page = json.loads(p)
33                        data = page['datalist']['list']
34                        for app in data:
35                                all_ids.append(app['id'])
36                                print app['id']
37
38                #print soup.p
39        except urllib2.HTTPError:
40                print "Error while fetching from database."
41
42if __name__=="__main__":
43        if len(sys.argv)>1:
44                get_list_id(sys.argv[1])
Note: See TracBrowser for help on using the repository browser.