#!/usr/bin/env python3
from datetime import date, datetime
from pony.orm import *
db = Database()
class App(db.Entity):
id = PrimaryKey(str, 800)
name = Optional(str, 100)
author = Optional(str, 100)
cate = Optional(str, 100)
scored = Optional(str, 50)
installed = Optional(str, 50)
dl = Optional(datetime)
ts = Optional(datetime, sql_default="(datetime('now', 'localtime'))")
if __name__ == '__main__':
db.bind('sqlite', filename=':memory:')
db.generate_mapping(create_tables=True)
with db_session:
App(id='a')
with db_session:
a = App['a']
t = a.ts
print(t, type(t))
else:
@db.on_connect(provider="sqlite")
def _home_sqliterc(_, conn):
import pathlib
rc = pathlib.Path.home() / ".sqliterc"
rc.exists() and conn.executescript(rc.read_text())
db.bind('sqlite', filename='db')
db.generate_mapping(create_tables=True)
#!/usr/bin/env python3
from datetime import date, datetime
from collections import deque
from requests_html import HTMLSession
session = HTMLSession()
from entities import App, db_session, select
q = deque()
def init_app(id):
with db_session:
app = App.get(id=id)
if not app:
app = App(id=id)
return True
def fetch(id):
if id.startswith("https://"):
link = id
else:
link = f"https://play.google.com/store/apps/details?id={id}"
init_app(id)
with db_session:
app = App[id]
if app.dl:
return
r = session.get(link)
app.dl = datetime.now()
if not id.startswith('https://'):
with open(f'apps/{id}', 'wb') as f:
f.write(r.html.raw_html)
for link in r.html.absolute_links:
if not link.startswith("https://play.google.com/store/apps/"):
continue
if link.startswith("https://play.google.com/store/apps/details"):
id = link.rpartition("id=")[2]
else:
id = link
if init_app(id):
q.append(id)
def main():
with db_session:
q.extend(select(i.id for i in App if i.dl is None))
if not q:
q.append("https://play.google.com/store/apps/top")
while True:
print(len(q))
if not q:
break
link = q.popleft()
if 'collection/cluster' in link:
continue
fetch(link)
if __name__ == '__main__':
main()
#!/usr/bin/env python3
import re
from entities import App, db_session, select
pattern = r'Downloaded (\d+) times'
def get_downloaded(id):
with db_session:
app = App[id]
result = app.installed
if result:
return result
with open(f"apps/{id}") as f:
result = re.search(pattern, f.read())
result = result and result.groups()[0] or '0'
app.installed = result
return result
def main():
with db_session:
l = select(i.id for i in App if not i.id.startswith('https://') and i.dl)[:]
#print(l)
cnt = []
for id in l:
n = int(get_downloaded(id))
cnt.append((n, id, ))
cnt.sort(reverse=True)
for n, id in cnt:
print(n, id, sep='\t')
if __name__ == '__main__':
main()