Bimba.git

commit d5d588cdf298bcfd08a568cc25fe9754c2677f4b

Author: Adam Pioterek <adam.pioterek@protonmail.ch>

incremental scrapper

 research/scraper.py | 16 +++-------------


diff --git a/research/scraper.py b/research/scraper.py
index 37dd35d799fddc496ec0b2f9e1816f63785707d9..10272c77614a11607d4706af4eb777e687d3e6f1 100755
--- a/research/scraper.py
+++ b/research/scraper.py
@@ -171,7 +171,6 @@     if os.path.exists('timetable.db'):
         updating = True
 
 
-    print(time.time())
     with sqlite3.connect('timetable.db') as connection:
         try:
             cursor = connection.cursor()
@@ -181,7 +180,6 @@                 current_valid_from = cursor.fetchone()[0]
                 if get_validity() <= current_valid_from:
                     return 304
             else:
-                print('creating tables')
                 cursor.execute('create table metadata(key TEXT PRIMARY KEY, value TEXT)')
                 cursor.execute('create table checksums(checksum TEXT, for TEXT, id TEXT)')
                 cursor.execute('create table nodes(symbol TEXT PRIMARY KEY, name TEXT)')
@@ -196,10 +194,10 @@                                 timetable_id TEXT references timetable(id), \
                                 hour INTEGER, minute INTEGER, mode TEXT, \
                                 lowFloor INTEGER, modification TEXT)')
 
-            print('getting validity')
             cursor.execute("delete from metadata where key = 'validFrom'")
-            cursor.execute("insert into metadata values('validFrom', ?)", (get_validity(),))
-            print('getting nodes')
+            validity = get_validity()
+            print(validity)
+            cursor.execute("insert into metadata values('validFrom', ?)", (validity,))
             cursor.execute("select checksum from checksums where for = 'nodes'")
             checksum = cursor.fetchone()
             if checksum != None:
@@ -219,10 +217,8 @@                 cursor.execute('select * from nodes')
                 nodes = cursor.fetchall()
                 nodes = [(sym, nam) for sym, nam, _ in nodes]
             nodes_no = len(nodes)
-            print('getting stops')
             node_i = 1
             for symbol, _ in nodes:
-                print('\rnode {}/{}'.format(node_i, nodes_no), end='')
                 sys.stdout.flush()
                 cursor.execute("select checksum from checksums where for = 'node' and id = ?", (symbol,))
                 checksum = cursor.fetchone()
@@ -238,7 +234,6 @@                     cursor.executemany('insert into stops values(?, ?, ?, ?, ?, ?)', stops)
                     cursor.execute("update checksums set checksum = ? where for = 'node' and id = ?", (checksum, symbol))
                     changed = True
                 node_i += 1
-            print('\ngetting lines')
             cursor.execute("select checksum from checksums where for = 'lines'")
             checksum = cursor.fetchone()
             if checksum != None:
@@ -268,8 +263,6 @@                     stops_no = len(stops)
                     stop_i = 1
                     for stop in stops:
                         timetable_id = secrets.token_hex(4)
-                        print('line {}/{} route {}/{} stop {}/{}'.
-                              format(line_i, lines_no, route_i, routes_no, stop_i, stops_no), end='')
                         sys.stdout.flush()
                         cursor.execute("select checksum from checksums where for = 'timetable' and id = ?", (timetable_id,))
                         checksum = cursor.fetchone()
@@ -293,14 +286,11 @@                                 cursor.executemany('insert into departures values(null, ?, ?, ?, ?, ?, \
                                                     ?)', [(timetable_id, hour, minute, mode, lowfloor, desc)
                                                           for hour, minute, desc, lowfloor in times])
                         stop_i += 1
-                        print('{}\r'.format(' '*35), end='')
                         sys.stdout.flush()
                     route_i += 1
-                print('')
                 line_i += 1
         except KeyboardInterrupt:
             return 404
-    print(time.time())
     if changed:
         return 0
     return 304