Bimba.git

commit d22b05ce70b4894171eb23c4c59c73b65980c433

Author: Adam Pioterek <adam.pioterek@protonmail.ch>

line graphs

 database/bundle.pem | 83 ++++++++++++++++++++++++++++++++++++++++
 database/scraper.py | 97 +++++++++++++++++++++++++++++++++-------------


diff --git a/database/bundle.pem b/database/bundle.pem
new file mode 100644
index 0000000000000000000000000000000000000000..12585890132ce18cf3ebf0ac2819017cba1e3123
--- /dev/null
+++ b/database/bundle.pem
@@ -0,0 +1,83 @@
+-----BEGIN CERTIFICATE-----
+MIIDVDCCAjygAwIBAgIDAjRWMA0GCSqGSIb3DQEBBQUAMEIxCzAJBgNVBAYTAlVT
+MRYwFAYDVQQKEw1HZW9UcnVzdCBJbmMuMRswGQYDVQQDExJHZW9UcnVzdCBHbG9i
+YWwgQ0EwHhcNMDIwNTIxMDQwMDAwWhcNMjIwNTIxMDQwMDAwWjBCMQswCQYDVQQG
+EwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEbMBkGA1UEAxMSR2VvVHJ1c3Qg
+R2xvYmFsIENBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA2swYYzD9
+9BcjGlZ+W988bDjkcbd4kdS8odhM+KhDtgPpTSEHCIjaWC9mOSm9BXiLnTjoBbdq
+fnGk5sRgprDvgOSJKA+eJdbtg/OtppHHmMlCGDUUna2YRpIuT8rxh0PBFpVXLVDv
+iS2Aelet8u5fa9IAjbkU+BQVNdnARqN7csiRv8lVK83Qlz6cJmTM386DGXHKTubU
+1XupGc1V3sjs0l44U+VcT4wt/lAjNvxm5suOpDkZALeVAjmRCw7+OC7RHQWa9k0+
+bw8HHa8sHo9gOeL6NlMTOdReJivbPagUvTLrGAMoUgRx5aszPeE4uwc2hGKceeoW
+MPRfwCvocWvk+QIDAQABo1MwUTAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTA
+ephojYn7qwVkDBF9qn1luMrMTjAfBgNVHSMEGDAWgBTAephojYn7qwVkDBF9qn1l
+uMrMTjANBgkqhkiG9w0BAQUFAAOCAQEANeMpauUvXVSOKVCUn5kaFOSPeCpilKIn
+Z57QzxpeR+nBsqTP3UEaBU6bS+5Kb1VSsyShNwrrZHYqLizz/Tt1kL/6cdjHPTfS
+tQWVYrmm3ok9Nns4d0iXrKYgjy6myQzCsplFAMfOEVEiIuCl6rYVSAlk6l5PdPcF
+PseKUgzbFbS9bZvlxrFUaKnjaZC2mqUPuLk/IH2uSrW4nOQdtqvmlKXBx4Ot2/Un
+hw4EbNX/3aBd7YdStysVAq45pmp06drE57xNNB6pXE0zX5IJL4hmXXeXxx12E6nV
+5fEWCRE11azbJHFwLJhWC9kXtNHjUStedejV0NxPNO3CBWaAocvmMw==
+-----END CERTIFICATE-----
+-----BEGIN CERTIFICATE-----
+MIIETTCCAzWgAwIBAgIDAjpxMA0GCSqGSIb3DQEBCwUAMEIxCzAJBgNVBAYTAlVT
+MRYwFAYDVQQKEw1HZW9UcnVzdCBJbmMuMRswGQYDVQQDExJHZW9UcnVzdCBHbG9i
+YWwgQ0EwHhcNMTMxMjExMjM0NTUxWhcNMjIwNTIwMjM0NTUxWjBCMQswCQYDVQQG
+EwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEbMBkGA1UEAxMSUmFwaWRTU0wg
+U0hBMjU2IENBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAu1jBEgEu
+l9h9GKrIwuWF4hdsYC7JjTEFORoGmFbdVNcRjFlbPbFUrkshhTIWX1SG5tmx2GCJ
+a1i+ctqgAEJ2sSdZTM3jutRc2aZ/uyt11UZEvexAXFm33Vmf8Wr3BvzWLxmKlRK6
+msrVMNI4/Bk7WxU7NtBDTdFlodSLwWBBs9ZwF8w5wJwMoD23ESJOztmpetIqYpyg
+C04q18NhWoXdXBC5VD0tA/hJ8LySt7ecMcfpuKqCCwW5Mc0IW7siC/acjopVHHZD
+dvDibvDfqCl158ikh4tq8bsIyTYYZe5QQ7hdctUoOeFTPiUs2itP3YqeUFDgb5rE
+1RkmiQF1cwmbOwIDAQABo4IBSjCCAUYwHwYDVR0jBBgwFoAUwHqYaI2J+6sFZAwR
+fap9ZbjKzE4wHQYDVR0OBBYEFJfCJ1CewsnsDIgyyHyt4qYBT9pvMBIGA1UdEwEB
+/wQIMAYBAf8CAQAwDgYDVR0PAQH/BAQDAgEGMDYGA1UdHwQvMC0wK6ApoCeGJWh0
+dHA6Ly9nMS5zeW1jYi5jb20vY3Jscy9ndGdsb2JhbC5jcmwwLwYIKwYBBQUHAQEE
+IzAhMB8GCCsGAQUFBzABhhNodHRwOi8vZzIuc3ltY2IuY29tMEwGA1UdIARFMEMw
+QQYKYIZIAYb4RQEHNjAzMDEGCCsGAQUFBwIBFiVodHRwOi8vd3d3Lmdlb3RydXN0
+LmNvbS9yZXNvdXJjZXMvY3BzMCkGA1UdEQQiMCCkHjAcMRowGAYDVQQDExFTeW1h
+bnRlY1BLSS0xLTU2OTANBgkqhkiG9w0BAQsFAAOCAQEANevhiyBWlLp6vXmp9uP+
+bji0MsGj21hWID59xzqxZ2nVeRQb9vrsYPJ5zQoMYIp0TKOTKqDwUX/N6fmS/Zar
+RfViPT9gRlATPSATGC6URq7VIf5Dockj/lPEvxrYrDrK3maXI67T30pNcx9vMaJR
+BBZqAOv5jUOB8FChH6bKOvMoPF9RrNcKRXdLDlJiG9g4UaCSLT+Qbsh+QJ8gRhVd
+4FB84XavXu0R0y8TubglpK9YCa81tGJUheNI3rzSkHp6pIQNo0LyUcDUrVNlXWz4
+Px8G8k/Ll6BKWcZ40egDuYVtLLrhX7atKz4lecWLVtXjCYDqwSfC2Q7sRwrp0Mr8
+2A==
+-----END CERTIFICATE-----
+-----BEGIN CERTIFICATE-----
+MIIGZTCCBU2gAwIBAgIQCWJPhs8z+oLr80YZnKUBTjANBgkqhkiG9w0BAQsFADBC
+MQswCQYDVQQGEwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEbMBkGA1UEAxMS
+UmFwaWRTU0wgU0hBMjU2IENBMB4XDTE3MDYyMjAwMDAwMFoXDTE4MDYyMjIzNTk1
+OVowHDEaMBgGA1UEAwwRd3d3Lnp0bS5wb3puYW4ucGwwggIiMA0GCSqGSIb3DQEB
+AQUAA4ICDwAwggIKAoICAQCn/bPI4eRB/t1KyfVyb7QtesUzDQ0xgqUg/I4SasJZ
+wj1G1fmkP9r7e1pACZ3SVrZwezqfa0dpRtSt6RYyqI70hdklLRAc4PsQb6ZWaOuz
+Lg118OK4vDBAono+AlD0vNEdeF1vunQHsGCKvZ4V9SrNv3v86xw1iHZB1E3zupXx
+xLrJSuD4D3HfUaiVwJnE0rF/2b2SF9WstSSczJm5b31uI8+CbCgThv380J/sMAKn
+0UKHaxd+qZWbDMIS0OLc6kdFnm2fNrmwiSNtnE6bMaZBq9igCA41hMriGgBH2h0q
+6AqlxK47DCDRi08H3DEBmi0mRmHdxu2AbEnHPPZuhOUvoHIwAiirI3Siivy2CLm1
+mBoHtrgK9OoID6bsSjhPG/zDBT4wd5QAOwNSQ9ncQy0lEyC3pQ80P4pVo8Oq0HlW
+BPmlUWfD7C7Dh+h0RMP+aN0KD9oDL7Fx9zNZBjqqYSuqczPZIcSPi7c9YP4g/c2r
+SgmhAl/4EDN6UVZy8lWyD178OL5GoeCxWaxspCry5h5PAMsr+sftgXx9/Z4ymi/8
+W6LUfeCmehmtxHWoTpCgT52Q9wQmqnZphj9KLNCqKpJRNV5YrAxc4ZDEJaxZI/ZS
+TFptJKVJstBKfeBw203gaye8U4xiVxrSLOJqEL65kjZg1LOSCn315iPH7zTR1xS2
+GQIDAQABo4ICezCCAncwHAYDVR0RBBUwE4IRd3d3Lnp0bS5wb3puYW4ucGwwCQYD
+VR0TBAIwADArBgNVHR8EJDAiMCCgHqAchhpodHRwOi8vZ3Auc3ltY2IuY29tL2dw
+LmNybDBvBgNVHSAEaDBmMGQGBmeBDAECATBaMCoGCCsGAQUFBwIBFh5odHRwczov
+L3d3dy5yYXBpZHNzbC5jb20vbGVnYWwwLAYIKwYBBQUHAgIwIAweaHR0cHM6Ly93
+d3cucmFwaWRzc2wuY29tL2xlZ2FsMB8GA1UdIwQYMBaAFJfCJ1CewsnsDIgyyHyt
+4qYBT9pvMA4GA1UdDwEB/wQEAwIFoDAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYB
+BQUHAwIwVwYIKwYBBQUHAQEESzBJMB8GCCsGAQUFBzABhhNodHRwOi8vZ3Auc3lt
+Y2QuY29tMCYGCCsGAQUFBzAChhpodHRwOi8vZ3Auc3ltY2IuY29tL2dwLmNydDCC
+AQMGCisGAQQB1nkCBAIEgfQEgfEA7wB2AN3rHSt6DU+mIIuBrYFocH4ujp0B1VyI
+jT0RxM227L7MAAABXM9F02oAAAQDAEcwRQIgXZ3Ri+/TqCOBrN0h1bgZMeaMUGCq
+Z+X3x32HnilBFqwCIQDYH4+Vkc5PKYGU8zwSDPa5flQlOrnQSoZ1MmGzJzzWeAB1
+AKS5CZC0GFgUh7sTosxncAo8NZgE+RvfuON3zQ7IDdwQAAABXM9F06EAAAQDAEYw
+RAIgHPwBJvn48rGK4XlKWYDaoA2JYu0ncaQVyJTOsccc64MCIGjc42DqWCyhSUAA
+TMFz6GVsglqfvv90bCk3yEoLyEQoMA0GCSqGSIb3DQEBCwUAA4IBAQCmXR1Pmo8J
+uMqVJe6NXklEmoFwM1UJ4td8fLrwMEYwaknFSmaB6JkjOS5cZ3Segb9XBY4sT9Ya
+AvF2kdOKA339UaSd+yimYYLt/KhO1nnYlNKrfGcjTnybuyczUDxsYR4N8FbTI9Yr
+Z0baIlbVmSax9tjxwFMLWkgcd6tbwzCs+XPjvKMwQgxfuu14a5lI9EMsE1jjgxpa
+jhedn5fNtKZdgaY9NVOsTkPSw79CzQfgSamSONyWaT949maFRe//sZPBOnE4jTBb
+VJrNbfm+H+NFv7bF1Js2xPkgJuGkuKVc1H1qoJX4lf1vfpaZSSmzbe2O6GXHKF4y
+MKlNUKhgoBO4
+-----END CERTIFICATE-----




diff --git a/database/scraper.py b/database/scraper.py
index 0fdf7831ba4a63bf697c6a73a2ae789a9c2da2d4..6475d82f51b09be68be5c9a9fe89efc903f174a0 100755
--- a/database/scraper.py
+++ b/database/scraper.py
@@ -83,34 +83,19 @@         directions = soup.findAll(attrs={'class': re.compile(r'.*\baccordion-item\b.*')})
         routes = {}
         for direction in directions:
             direction_id = direction['data-directionid']
+
             route = [{'id': stop.find('a')['data-stopid'], 'name': stop['data-name'],
-                      'onDemand': re.search('stop-onDemand', str(stop['class'])) != None}
+                      'onDemand': re.search('stop-onDemand', str(stop['class'])) != None,
+                      'variant_type': re.search('variant-type-(in|out)', str(stop['class'])).groups()[0]\
+                              if re.search('variant-type-(in|out)', str(stop['class'])) is not None else None,
+                      'variant_first': re.search('first-variant', str(stop['class'])) is not None,
+                      'variant_last': re.search('last-variant', str(stop['class'])) is not None}
                      for stop in direction.findAll(attrs={'class': re.compile(r'.*\bstop-itm\b.*')})]
             routes[direction_id] = route
         return routes
 
 
     def __get_stop_times(self, stop_id, line_id, direction_id):
-        """
-        get timetable
-        """
-
-        """ todo get time to next stop:
-            <div class="route-timeline">
-            <ul>
-            <li…>
-            <span class="stop-title">{current node_name} (n/ż)?</span>    --> if not present, return None
-            …
-            </li>
-            <li…>
-            …
-            <span class="time">{time:INT}'</span>
-            </li>
-            </ul>
-            </div>
-
-        """
-
         index = self.__post('https://www.ztm.poznan.pl/goeuropa-api/stop-info/{}/{}'.
                                   format(stop_id, line_id), {'directionId': direction_id})
         soup = BeautifulSoup(index.text, 'html.parser')
@@ -171,6 +156,25 @@             self.session = requests.session()
             return self.session.post(url, data=data, verify='bundle.pem')
 
 
+    # todo take into account parent (and for variant stops it needs synced departure times)
+    @staticmethod
+    def __calculate_time_to_next_stop(times, last_time_of_arrival):
+        times.sort()
+        earliest_departure = times[0]
+        if last_time_of_arrival == "":
+            return None, earliest_departure
+
+        hour = int(earliest_departure[:2])
+        minute = int(earliest_departure[3:])
+        minute = minute + (60 * hour)
+
+        last_hour = int(last_time_of_arrival[:2])
+        last_minute = int(last_time_of_arrival[3:])
+        last_minute = last_minute + (60 * last_hour)
+
+        time_to_next_stop = minute - last_minute
+        return time_to_next_stop, earliest_departure
+
     def download(self):
         """
         main function
@@ -196,9 +200,10 @@                 cursor.execute('create table stops(id TEXT PRIMARY KEY, symbol TEXT \
                                 references node(symbol), number TEXT, lat REAL, lon REAL, \
                                 headsigns TEXT)')
                 cursor.execute('create table lines(id TEXT PRIMARY KEY, number TEXT)')
-                cursor.execute('create table timetables(id TEXT PRIMARY KEY, stop_id TEXT references \
-                                stop(id), line_id TEXT references line(id), headsign TEXT, \
-                                numberInRoute INTEGER)')
+                cursor.execute('create table timetables(id TEXT PRIMARY KEY, stop_id TEXT \
+                                references stop(id), line_id TEXT references line(id), \
+                                headsign TEXT, parent TEXT references id, \
+                                parent_variant TEXT references id)')
                 cursor.execute('create table departures(id INTEGER PRIMARY KEY, \
                                 timetable_id TEXT references timetable(id), \
                                 hour INTEGER, minute INTEGER, mode TEXT, \
@@ -227,18 +232,56 @@                     route = self.__get_route(line_id)
                     route_i = 1
                     for direction, stops in route.items():
                         stop_i = 1
-                        for stop in stops:
+                        parent_stop = None
+                        parent_stop_variant = None
+                        for stop in stops[:-1]:
                             if self.verbose:
                                 print("stop {} in route {} in line {}".format(stop_i, route_i, line_i))
                             timetables = self.__get_stop_times(stop['id'], line_id, direction)
-                            cursor.execute('insert into timetables values(?, ?, ?, ?, ?)',
-                                           (timetable_id, stop['id'], line_id, stops[-1]['name'], stop_i))
+                            
+                            if stop_i == 1 and stop['variant_type'] is None:
+                                if self.verbose:
+                                    print('stop1 & main')
+                                parent = None
+                                parent_variant = None
+                                parent_stop = stop['id']
+                            elif stop['variant_type'] == 'in' and stop['variant_first']:
+                                if self.verbose:
+                                    print('in & first')
+                                parent = None
+                                parent_variant = None
+                                parent_stop_variant = stop['id']
+                            elif stop_i > 1 and stop['variant_type'] is None:
+                                if self.verbose:
+                                    print('stop>1 & main')
+                                parent = parent_stop
+                                parent_variant = parent_stop_variant
+                                parent_stop = stop['id']
+                                parent_stop_variant = None
+                            elif stop['variant_type'] is not None and not stop['variant_first']:
+                                if self.verbose:
+                                    print('variant & not first')
+                                parent = None
+                                parent_variant = parent_stop_variant
+                                parent_stop_variant = stop['id']
+                            elif stop['variant_type'] == 'out' and stop['variant_first']:
+                                if self.verbose:
+                                    print('out & first')
+                                parent = None
+                                parent_variant = parent_stop
+                                parent_stop_variant = stop['id']
+                            if stop['variant_type'] == 'out' and stop['variant_last']:
+                                parent_stop_variant = None
+
+                            cursor.execute('insert into timetables values(?, ?, ?, ?, ?, ?)',
+                                           (timetable_id, stop['id'], line_id, stops[-1]['name'], parent, parent_variant))
                             for mode, times in timetables.items():
                                 cursor.executemany('insert into departures values(null, ?, ?, ?, ?, ?, \
                                                     ?)', [(timetable_id, hour, minute, mode, lowfloor, desc)
                                                           for hour, minute, desc, lowfloor in times])
                             stop_i += 1
                             timetable_id += 1
+
                         route_i += 1
                     line_i += 1
             except KeyboardInterrupt: