
commit 84baea2e358831b29e22b269f8f578abc6111e26

Author: Adam Pioterek <>

database directory

 .gitignore | 9 ++-------
 database/cron | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 database/db.schema | 30 ++++++++++++++++++++++++++++++
  | 0 

diff --git a/.gitignore b/.gitignore
index 03e31c52b899a1f7d7fae329bddd1c922968b798..f59b45660e95a0a997e5ddfb6988bdd928658004 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,10 +45,5 @@ /captures

diff --git a/database/cron b/database/cron
new file mode 100644
index 0000000000000000000000000000000000000000..36b7f43366ebdccab4f9f4e6d8221507fca495de
--- /dev/null
+++ b/database/cron
@@ -0,0 +1,48 @@
+d=$(date -Iminutes)
+cd /home/qeebmisk
+set +e
+/usr/bin/python3.6 >new_meta 2> "scraper_$d.log"
+if [ $result -eq 0 ]
+    /usr/bin/sqlite3 timetable.db 'pragma integrity_check;'
+    if [ $? -eq 0 ]
+    then
+        trap except ERR
+        echo 'making metadata' >> "scraper_$d.log"
+        mv new_meta timetable.db.meta
+        sha512sum timetable.db | cut -d ' ' -f 1 >>timetable.db.meta
+        ls -l timetable.db | cut -d ' ' -f 5 >>timetable.db.meta
+        echo '1.1.0' >>timetable.db.meta
+        echo 'timetable.db.xz' >>timetable.db.meta
+        xz -z -k timetable.db
+        echo 'moving' >> "scraper_$d.log"
+        mv timetable.db.xz public_html/w/data/media/programmes/bimba/timetable.db.xz
+        mv timetable.db.meta public_html/w/data/media/programmes/bimba/timetable.db.meta
+        echo 'success' >> "scraper_$d.log"
+    else
+        echo 'db integrity check failed' >> "scraper_$d.log"
+        rm timetable.db
+        rm new_meta
+    fi
+elif [ $result -eq 48 ]
+    echo 'db is still valid' >> "scraper_$d.log"
+    rm new_meta
+    echo 'scraper failed' >> "scraper_$d.log"
+    rm timetable.db
+    rm new_meta
+function except {
+    (rm /home/qeebmisk/timetable.db; rm /home/qeebmisk/new_meta) || :

diff --git a/database/db.schema b/database/db.schema
new file mode 100644
index 0000000000000000000000000000000000000000..2f7de0bddd67bb1dfb80e9e8504c1b1dcf385eb6
--- /dev/null
+++ b/database/db.schema
@@ -0,0 +1,30 @@
+nodes             stops
+-------           ---------
+#symbol <____     #id <_________
+name         |___ symbol        |
+                  number        |
+                  lat           |
+                  lon           |
+                  headsigns     |
+                                |
+lines             timetables    |
+------            ----------    |
+#id <______       #id <________/|\__
+number     |      stop_id ______|   |
+           |_____ line_id           |
+                  headsign          |
+                                    |
+                  departures        |
+                  ----------        |
+                  #id               |
+                  timetable_id _____|
+                  hour
+                  minute
+                  mode
+                  lowFloor
+                  modification

diff --git a/database/ b/database/
new file mode 100755
index 0000000000000000000000000000000000000000..7109b9d730a39c50dfc4babceef440dcbcb85020
--- /dev/null
+++ b/database/
@@ -0,0 +1,237 @@
+js interface:
+stops in node:{node:symbol}
+bike stations:
+import json
+import os
+import re
+import sqlite3
+import sys
+import requests
+from bs4 import BeautifulSoup
+class TimetableDownloader:
+    """
+    downloader class
+    """
+    def __init__(self, verbose):
+        self.session = requests.session()
+        self.verbose = verbose
+    def __get_validity(self):
+        """
+        get timetable validity
+        """
+        index = self.__get('')
+        option ='<option value="[0-9]{8}" selected', index.text).group()
+        return option.split('"')[1]
+    def __get_nodes(self):
+        """
+        get nodes
+        """
+        index = self.__get('')
+        return [(stop['symbol'], stop['name']) for stop in json.loads(index.text)]
+    def __get_stops(self, node):
+        """
+        get stops
+        """
+        index = self.__get('{}'.format(node))
+        stops = []
+        for stop in json.loads(index.text):
+            stop_id = stop['stop']['id']
+            number = re.findall("\\d+", stop['stop']['symbol'])[0]
+            lat = stop['stop']['lat']
+            lon = stop['stop']['lon']
+            directions = ', '.join(['{} → {}'.format(transfer['name'], transfer['headsign'])
+                                    for transfer in stop['transfers']])
+            stops.append((stop_id, node, number, lat, lon, directions))
+        return stops
+    def __get_lines(self):
+        """
+        get lines
+        """
+        index = self.__get('')
+        soup = BeautifulSoup(index.text, 'html.parser')
+        lines = {line['data-lineid']: line.text for line in
+                 soup.findAll(attrs={'class': re.compile(r'.*\blineNo-bt\b.*')})}
+        return lines
+    def __get_route(self, line_id):
+        """
+        get routes
+        """
+        index = self.__get('{}'.format(line_id))
+        soup = BeautifulSoup(index.text, 'html.parser')
+        directions = soup.findAll(attrs={'class': re.compile(r'.*\baccordion-item\b.*')})
+        routes = {}
+        for direction in directions:
+            direction_id = direction['data-directionid']
+            route = [{'id': stop.find('a')['data-stopid'], 'name': stop['data-name'],
+                      'onDemand':'stop-onDemand', str(stop['class'])) != None}
+                     for stop in direction.findAll(attrs={'class': re.compile(r'.*\bstop-itm\b.*')})]
+            routes[direction_id] = route
+        return routes
+    def __get_stop_times(self, stop_id, line_id, direction_id):
+        """
+        get timetable
+        """
+        index = self.__post('{}/{}'.
+                                  format(stop_id, line_id), {'directionId': direction_id})
+        soup = BeautifulSoup(index.text, 'html.parser')
+        legends = {}
+        for row in soup.find(attrs={'class': re.compile(r'.*\blegend-box\b.*')}).findAll('li'):
+            row = row.text.split('-')
+            row[0] = row[0].rstrip()
+            row[1] = row[1].lstrip()
+            if row[0] != '_':
+                legends[row[0]] = '-'.join(row[1:])
+        schedules = {}
+        for mode in soup.findAll(attrs={'class': re.compile(r'.*\bmode-tab\b.*')}):
+            mode_name = mode['data-mode']
+            schedule = {row.find('th').text: [
+                {'time': minute.text, 'lowFloor':'n-line', str(minute['class'])) != None}
+                for minute in row.findAll('a')]
+                        for row in mode.find(attrs={'class': re.compile(r'.*\bscheduler-hours\b.*')}).
+                        findAll('tr')}
+            schedule_2 = {hour: times for hour, times in schedule.items() if times != []}
+            schedule = []
+            for hour, deps in schedule_2.items():
+                for dep in deps:
+                    schedule.append((hour, *self.__describe(dep['time'], legends), dep['lowFloor']))
+            schedules[mode_name] = schedule
+        return schedules
+    @staticmethod
+    def __describe(dep_time, legend):
+        """
+        describe departure
+        """
+        desc = []
+        while re.match('^\\d+$', dep_time) is None:
+            try:
+                if dep_time[-1] != ',':
+                    desc.append(legend[dep_time[-1]])
+            except KeyError:
+                pass
+            dep_time = dep_time[:-1]
+        return (int(dep_time), '; '.join(desc))
+    def __get(self, url):
+        try:
+            return self.session.get(url, verify='bundle.pem')
+        except:
+            self.session = requests.session()
+            return self.session.get(url, verify='bundle.pem')
+    def __post(self, url, data):
+        try:
+            return, data=data, verify='bundle.pem')
+        except:
+            self.session = requests.session()
+            return, data=data, verify='bundle.pem')
+    def download(self):
+        """
+        main function
+        """
+        if os.path.exists('timetable.db'):
+            connection = sqlite3.connect('timetable.db')
+            cursor = connection.cursor()
+            cursor.execute("select value from metadata where key = 'validFrom'")
+            current_valid_from = cursor.fetchone()[0]
+            cursor.close()
+            connection.close()
+            if self.__get_validity() <= current_valid_from:
+                return 304
+            else:
+                os.remove('timetable.db')
+        with sqlite3.connect('timetable.db') as connection:
+            try:
+                cursor = connection.cursor()
+                cursor.execute('create table metadata(key TEXT PRIMARY KEY, value TEXT)')
+                cursor.execute('create table nodes(symbol TEXT PRIMARY KEY, name TEXT)')
+                cursor.execute('create table stops(id TEXT PRIMARY KEY, symbol TEXT \
+                                references node(symbol), number TEXT, lat REAL, lon REAL, \
+                                headsigns TEXT)')
+                cursor.execute('create table lines(id TEXT PRIMARY KEY, number TEXT)')
+                cursor.execute('create table timetables(id TEXT PRIMARY KEY, stop_id TEXT references \
+                                stop(id), line_id TEXT references line(id), headsign TEXT)')
+                cursor.execute('create table departures(id INTEGER PRIMARY KEY, \
+                                timetable_id TEXT references timetable(id), \
+                                hour INTEGER, minute INTEGER, mode TEXT, \
+                                lowFloor INTEGER, modification TEXT)')
+                validity = self.__get_validity()
+                print(validity)
+                sys.stdout.flush()
+                cursor.execute("insert into metadata values('validFrom', ?)", (validity,))
+                nodes = self.__get_nodes()
+                cursor.executemany('insert into nodes values(?, ?)', nodes)
+                node_i = 1
+                for symbol, _ in nodes:
+                    if self.verbose:
+                        print('node {}'.format(node_i))
+                    stops = self.__get_stops(symbol)
+                    cursor.executemany('insert into stops values(?, ?, ?, ?, ?, ?)', stops)
+                    node_i += 1
+                lines = self.__get_lines()
+                cursor.executemany('insert into lines values(?, ?)', lines.items())
+                timetable_id = 1
+                line_i = 1
+                for line_id, _ in lines.items():
+                    route = self.__get_route(line_id)
+                    route_i = 1
+                    for direction, stops in route.items():
+                        stop_i = 1
+                        for stop in stops:
+                            if self.verbose:
+                                print("stop {} in route {} in line {}".format(stop_i, route_i, line_i))
+                            timetables = self.__get_stop_times(stop['id'], line_id, direction)
+                            cursor.execute('insert into timetables values(?, ?, ?, ?)',
+                                           (timetable_id, stop['id'], line_id, stops[-1]['name']))
+                            for mode, times in timetables.items():
+                                cursor.executemany('insert into departures values(null, ?, ?, ?, ?, ?, \
+                                                    ?)', [(timetable_id, hour, minute, mode, lowfloor, desc)
+                                                          for hour, minute, desc, lowfloor in times])
+                            stop_i += 1
+                            timetable_id += 1
+                        route_i += 1
+                    line_i += 1
+            except KeyboardInterrupt:
+                return 404
+        return 0
+if __name__ == '__main__':
+    verbose = False
+    try:
+        if sys.argv[1] == '-v':
+            verbose = True
+    except IndexError:
+        pass
+    downloader = TimetableDownloader(verbose)
+    exit(

