Author: Adam Pioterek <adam.pioterek@protonmail.ch>
incremental scrapper
research/scraper.py | 16 +++-------------
diff --git a/research/scraper.py b/research/scraper.py index 37dd35d799fddc496ec0b2f9e1816f63785707d9..10272c77614a11607d4706af4eb777e687d3e6f1 100755 --- a/research/scraper.py +++ b/research/scraper.py @@ -171,7 +171,6 @@ if os.path.exists('timetable.db'): updating = True - print(time.time()) with sqlite3.connect('timetable.db') as connection: try: cursor = connection.cursor() @@ -181,7 +180,6 @@ current_valid_from = cursor.fetchone()[0] if get_validity() <= current_valid_from: return 304 else: - print('creating tables') cursor.execute('create table metadata(key TEXT PRIMARY KEY, value TEXT)') cursor.execute('create table checksums(checksum TEXT, for TEXT, id TEXT)') cursor.execute('create table nodes(symbol TEXT PRIMARY KEY, name TEXT)') @@ -196,10 +194,10 @@ timetable_id TEXT references timetable(id), \ hour INTEGER, minute INTEGER, mode TEXT, \ lowFloor INTEGER, modification TEXT)') - print('getting validity') cursor.execute("delete from metadata where key = 'validFrom'") - cursor.execute("insert into metadata values('validFrom', ?)", (get_validity(),)) - print('getting nodes') + validity = get_validity() + print(validity) + cursor.execute("insert into metadata values('validFrom', ?)", (validity,)) cursor.execute("select checksum from checksums where for = 'nodes'") checksum = cursor.fetchone() if checksum != None: @@ -219,10 +217,8 @@ cursor.execute('select * from nodes') nodes = cursor.fetchall() nodes = [(sym, nam) for sym, nam, _ in nodes] nodes_no = len(nodes) - print('getting stops') node_i = 1 for symbol, _ in nodes: - print('\rnode {}/{}'.format(node_i, nodes_no), end='') sys.stdout.flush() cursor.execute("select checksum from checksums where for = 'node' and id = ?", (symbol,)) checksum = cursor.fetchone() @@ -238,7 +234,6 @@ cursor.executemany('insert into stops values(?, ?, ?, ?, ?, ?)', stops) cursor.execute("update checksums set checksum = ? where for = 'node' and id = ?", (checksum, symbol)) changed = True node_i += 1 - print('\ngetting lines') cursor.execute("select checksum from checksums where for = 'lines'") checksum = cursor.fetchone() if checksum != None: @@ -268,8 +263,6 @@ stops_no = len(stops) stop_i = 1 for stop in stops: timetable_id = secrets.token_hex(4) - print('line {}/{} route {}/{} stop {}/{}'. - format(line_i, lines_no, route_i, routes_no, stop_i, stops_no), end='') sys.stdout.flush() cursor.execute("select checksum from checksums where for = 'timetable' and id = ?", (timetable_id,)) checksum = cursor.fetchone() @@ -293,14 +286,11 @@ cursor.executemany('insert into departures values(null, ?, ?, ?, ?, ?, \ ?)', [(timetable_id, hour, minute, mode, lowfloor, desc) for hour, minute, desc, lowfloor in times]) stop_i += 1 - print('{}\r'.format(' '*35), end='') sys.stdout.flush() route_i += 1 - print('') line_i += 1 except KeyboardInterrupt: return 404 - print(time.time()) if changed: return 0 return 304