Author: Adam Pioterek <adam.pioterek@protonmail.ch>
line graphs
database/bundle.pem | 83 ++++++++++++++++++++++++++++++++++++++++ database/scraper.py | 97 +++++++++++++++++++++++++++++++++-------------
diff --git a/database/bundle.pem b/database/bundle.pem new file mode 100644 index 0000000000000000000000000000000000000000..12585890132ce18cf3ebf0ac2819017cba1e3123 --- /dev/null +++ b/database/bundle.pem @@ -0,0 +1,83 @@ +-----BEGIN CERTIFICATE----- +MIIDVDCCAjygAwIBAgIDAjRWMA0GCSqGSIb3DQEBBQUAMEIxCzAJBgNVBAYTAlVT +MRYwFAYDVQQKEw1HZW9UcnVzdCBJbmMuMRswGQYDVQQDExJHZW9UcnVzdCBHbG9i +YWwgQ0EwHhcNMDIwNTIxMDQwMDAwWhcNMjIwNTIxMDQwMDAwWjBCMQswCQYDVQQG +EwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEbMBkGA1UEAxMSR2VvVHJ1c3Qg +R2xvYmFsIENBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA2swYYzD9 +9BcjGlZ+W988bDjkcbd4kdS8odhM+KhDtgPpTSEHCIjaWC9mOSm9BXiLnTjoBbdq +fnGk5sRgprDvgOSJKA+eJdbtg/OtppHHmMlCGDUUna2YRpIuT8rxh0PBFpVXLVDv +iS2Aelet8u5fa9IAjbkU+BQVNdnARqN7csiRv8lVK83Qlz6cJmTM386DGXHKTubU +1XupGc1V3sjs0l44U+VcT4wt/lAjNvxm5suOpDkZALeVAjmRCw7+OC7RHQWa9k0+ +bw8HHa8sHo9gOeL6NlMTOdReJivbPagUvTLrGAMoUgRx5aszPeE4uwc2hGKceeoW +MPRfwCvocWvk+QIDAQABo1MwUTAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTA +ephojYn7qwVkDBF9qn1luMrMTjAfBgNVHSMEGDAWgBTAephojYn7qwVkDBF9qn1l +uMrMTjANBgkqhkiG9w0BAQUFAAOCAQEANeMpauUvXVSOKVCUn5kaFOSPeCpilKIn +Z57QzxpeR+nBsqTP3UEaBU6bS+5Kb1VSsyShNwrrZHYqLizz/Tt1kL/6cdjHPTfS +tQWVYrmm3ok9Nns4d0iXrKYgjy6myQzCsplFAMfOEVEiIuCl6rYVSAlk6l5PdPcF +PseKUgzbFbS9bZvlxrFUaKnjaZC2mqUPuLk/IH2uSrW4nOQdtqvmlKXBx4Ot2/Un +hw4EbNX/3aBd7YdStysVAq45pmp06drE57xNNB6pXE0zX5IJL4hmXXeXxx12E6nV +5fEWCRE11azbJHFwLJhWC9kXtNHjUStedejV0NxPNO3CBWaAocvmMw== +-----END CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIIETTCCAzWgAwIBAgIDAjpxMA0GCSqGSIb3DQEBCwUAMEIxCzAJBgNVBAYTAlVT +MRYwFAYDVQQKEw1HZW9UcnVzdCBJbmMuMRswGQYDVQQDExJHZW9UcnVzdCBHbG9i +YWwgQ0EwHhcNMTMxMjExMjM0NTUxWhcNMjIwNTIwMjM0NTUxWjBCMQswCQYDVQQG +EwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEbMBkGA1UEAxMSUmFwaWRTU0wg +U0hBMjU2IENBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAu1jBEgEu +l9h9GKrIwuWF4hdsYC7JjTEFORoGmFbdVNcRjFlbPbFUrkshhTIWX1SG5tmx2GCJ +a1i+ctqgAEJ2sSdZTM3jutRc2aZ/uyt11UZEvexAXFm33Vmf8Wr3BvzWLxmKlRK6 +msrVMNI4/Bk7WxU7NtBDTdFlodSLwWBBs9ZwF8w5wJwMoD23ESJOztmpetIqYpyg +C04q18NhWoXdXBC5VD0tA/hJ8LySt7ecMcfpuKqCCwW5Mc0IW7siC/acjopVHHZD +dvDibvDfqCl158ikh4tq8bsIyTYYZe5QQ7hdctUoOeFTPiUs2itP3YqeUFDgb5rE +1RkmiQF1cwmbOwIDAQABo4IBSjCCAUYwHwYDVR0jBBgwFoAUwHqYaI2J+6sFZAwR +fap9ZbjKzE4wHQYDVR0OBBYEFJfCJ1CewsnsDIgyyHyt4qYBT9pvMBIGA1UdEwEB +/wQIMAYBAf8CAQAwDgYDVR0PAQH/BAQDAgEGMDYGA1UdHwQvMC0wK6ApoCeGJWh0 +dHA6Ly9nMS5zeW1jYi5jb20vY3Jscy9ndGdsb2JhbC5jcmwwLwYIKwYBBQUHAQEE +IzAhMB8GCCsGAQUFBzABhhNodHRwOi8vZzIuc3ltY2IuY29tMEwGA1UdIARFMEMw +QQYKYIZIAYb4RQEHNjAzMDEGCCsGAQUFBwIBFiVodHRwOi8vd3d3Lmdlb3RydXN0 +LmNvbS9yZXNvdXJjZXMvY3BzMCkGA1UdEQQiMCCkHjAcMRowGAYDVQQDExFTeW1h +bnRlY1BLSS0xLTU2OTANBgkqhkiG9w0BAQsFAAOCAQEANevhiyBWlLp6vXmp9uP+ +bji0MsGj21hWID59xzqxZ2nVeRQb9vrsYPJ5zQoMYIp0TKOTKqDwUX/N6fmS/Zar +RfViPT9gRlATPSATGC6URq7VIf5Dockj/lPEvxrYrDrK3maXI67T30pNcx9vMaJR +BBZqAOv5jUOB8FChH6bKOvMoPF9RrNcKRXdLDlJiG9g4UaCSLT+Qbsh+QJ8gRhVd +4FB84XavXu0R0y8TubglpK9YCa81tGJUheNI3rzSkHp6pIQNo0LyUcDUrVNlXWz4 +Px8G8k/Ll6BKWcZ40egDuYVtLLrhX7atKz4lecWLVtXjCYDqwSfC2Q7sRwrp0Mr8 +2A== +-----END CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIIGZTCCBU2gAwIBAgIQCWJPhs8z+oLr80YZnKUBTjANBgkqhkiG9w0BAQsFADBC +MQswCQYDVQQGEwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEbMBkGA1UEAxMS +UmFwaWRTU0wgU0hBMjU2IENBMB4XDTE3MDYyMjAwMDAwMFoXDTE4MDYyMjIzNTk1 +OVowHDEaMBgGA1UEAwwRd3d3Lnp0bS5wb3puYW4ucGwwggIiMA0GCSqGSIb3DQEB +AQUAA4ICDwAwggIKAoICAQCn/bPI4eRB/t1KyfVyb7QtesUzDQ0xgqUg/I4SasJZ +wj1G1fmkP9r7e1pACZ3SVrZwezqfa0dpRtSt6RYyqI70hdklLRAc4PsQb6ZWaOuz +Lg118OK4vDBAono+AlD0vNEdeF1vunQHsGCKvZ4V9SrNv3v86xw1iHZB1E3zupXx +xLrJSuD4D3HfUaiVwJnE0rF/2b2SF9WstSSczJm5b31uI8+CbCgThv380J/sMAKn +0UKHaxd+qZWbDMIS0OLc6kdFnm2fNrmwiSNtnE6bMaZBq9igCA41hMriGgBH2h0q +6AqlxK47DCDRi08H3DEBmi0mRmHdxu2AbEnHPPZuhOUvoHIwAiirI3Siivy2CLm1 +mBoHtrgK9OoID6bsSjhPG/zDBT4wd5QAOwNSQ9ncQy0lEyC3pQ80P4pVo8Oq0HlW +BPmlUWfD7C7Dh+h0RMP+aN0KD9oDL7Fx9zNZBjqqYSuqczPZIcSPi7c9YP4g/c2r +SgmhAl/4EDN6UVZy8lWyD178OL5GoeCxWaxspCry5h5PAMsr+sftgXx9/Z4ymi/8 +W6LUfeCmehmtxHWoTpCgT52Q9wQmqnZphj9KLNCqKpJRNV5YrAxc4ZDEJaxZI/ZS +TFptJKVJstBKfeBw203gaye8U4xiVxrSLOJqEL65kjZg1LOSCn315iPH7zTR1xS2 +GQIDAQABo4ICezCCAncwHAYDVR0RBBUwE4IRd3d3Lnp0bS5wb3puYW4ucGwwCQYD +VR0TBAIwADArBgNVHR8EJDAiMCCgHqAchhpodHRwOi8vZ3Auc3ltY2IuY29tL2dw +LmNybDBvBgNVHSAEaDBmMGQGBmeBDAECATBaMCoGCCsGAQUFBwIBFh5odHRwczov +L3d3dy5yYXBpZHNzbC5jb20vbGVnYWwwLAYIKwYBBQUHAgIwIAweaHR0cHM6Ly93 +d3cucmFwaWRzc2wuY29tL2xlZ2FsMB8GA1UdIwQYMBaAFJfCJ1CewsnsDIgyyHyt +4qYBT9pvMA4GA1UdDwEB/wQEAwIFoDAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYB +BQUHAwIwVwYIKwYBBQUHAQEESzBJMB8GCCsGAQUFBzABhhNodHRwOi8vZ3Auc3lt +Y2QuY29tMCYGCCsGAQUFBzAChhpodHRwOi8vZ3Auc3ltY2IuY29tL2dwLmNydDCC +AQMGCisGAQQB1nkCBAIEgfQEgfEA7wB2AN3rHSt6DU+mIIuBrYFocH4ujp0B1VyI +jT0RxM227L7MAAABXM9F02oAAAQDAEcwRQIgXZ3Ri+/TqCOBrN0h1bgZMeaMUGCq +Z+X3x32HnilBFqwCIQDYH4+Vkc5PKYGU8zwSDPa5flQlOrnQSoZ1MmGzJzzWeAB1 +AKS5CZC0GFgUh7sTosxncAo8NZgE+RvfuON3zQ7IDdwQAAABXM9F06EAAAQDAEYw +RAIgHPwBJvn48rGK4XlKWYDaoA2JYu0ncaQVyJTOsccc64MCIGjc42DqWCyhSUAA +TMFz6GVsglqfvv90bCk3yEoLyEQoMA0GCSqGSIb3DQEBCwUAA4IBAQCmXR1Pmo8J +uMqVJe6NXklEmoFwM1UJ4td8fLrwMEYwaknFSmaB6JkjOS5cZ3Segb9XBY4sT9Ya +AvF2kdOKA339UaSd+yimYYLt/KhO1nnYlNKrfGcjTnybuyczUDxsYR4N8FbTI9Yr +Z0baIlbVmSax9tjxwFMLWkgcd6tbwzCs+XPjvKMwQgxfuu14a5lI9EMsE1jjgxpa +jhedn5fNtKZdgaY9NVOsTkPSw79CzQfgSamSONyWaT949maFRe//sZPBOnE4jTBb +VJrNbfm+H+NFv7bF1Js2xPkgJuGkuKVc1H1qoJX4lf1vfpaZSSmzbe2O6GXHKF4y +MKlNUKhgoBO4 +-----END CERTIFICATE----- diff --git a/database/scraper.py b/database/scraper.py index 0fdf7831ba4a63bf697c6a73a2ae789a9c2da2d4..6475d82f51b09be68be5c9a9fe89efc903f174a0 100755 --- a/database/scraper.py +++ b/database/scraper.py @@ -83,34 +83,19 @@ directions = soup.findAll(attrs={'class': re.compile(r'.*\baccordion-item\b.*')}) routes = {} for direction in directions: direction_id = direction['data-directionid'] + route = [{'id': stop.find('a')['data-stopid'], 'name': stop['data-name'], - 'onDemand': re.search('stop-onDemand', str(stop['class'])) != None} + 'onDemand': re.search('stop-onDemand', str(stop['class'])) != None, + 'variant_type': re.search('variant-type-(in|out)', str(stop['class'])).groups()[0]\ + if re.search('variant-type-(in|out)', str(stop['class'])) is not None else None, + 'variant_first': re.search('first-variant', str(stop['class'])) is not None, + 'variant_last': re.search('last-variant', str(stop['class'])) is not None} for stop in direction.findAll(attrs={'class': re.compile(r'.*\bstop-itm\b.*')})] routes[direction_id] = route return routes def __get_stop_times(self, stop_id, line_id, direction_id): - """ - get timetable - """ - - """ todo get time to next stop: - <div class="route-timeline"> - <ul> - <li…> - <span class="stop-title">{current node_name} (n/ż)?</span> --> if not present, return None - … - </li> - <li…> - … - <span class="time">{time:INT}'</span> - </li> - </ul> - </div> - - """ - index = self.__post('https://www.ztm.poznan.pl/goeuropa-api/stop-info/{}/{}'. format(stop_id, line_id), {'directionId': direction_id}) soup = BeautifulSoup(index.text, 'html.parser') @@ -171,6 +156,25 @@ self.session = requests.session() return self.session.post(url, data=data, verify='bundle.pem') + # todo take into account parent (and for variant stops it needs synced departure times) + @staticmethod + def __calculate_time_to_next_stop(times, last_time_of_arrival): + times.sort() + earliest_departure = times[0] + if last_time_of_arrival == "": + return None, earliest_departure + + hour = int(earliest_departure[:2]) + minute = int(earliest_departure[3:]) + minute = minute + (60 * hour) + + last_hour = int(last_time_of_arrival[:2]) + last_minute = int(last_time_of_arrival[3:]) + last_minute = last_minute + (60 * last_hour) + + time_to_next_stop = minute - last_minute + return time_to_next_stop, earliest_departure + def download(self): """ main function @@ -196,9 +200,10 @@ cursor.execute('create table stops(id TEXT PRIMARY KEY, symbol TEXT \ references node(symbol), number TEXT, lat REAL, lon REAL, \ headsigns TEXT)') cursor.execute('create table lines(id TEXT PRIMARY KEY, number TEXT)') - cursor.execute('create table timetables(id TEXT PRIMARY KEY, stop_id TEXT references \ - stop(id), line_id TEXT references line(id), headsign TEXT, \ - numberInRoute INTEGER)') + cursor.execute('create table timetables(id TEXT PRIMARY KEY, stop_id TEXT \ + references stop(id), line_id TEXT references line(id), \ + headsign TEXT, parent TEXT references id, \ + parent_variant TEXT references id)') cursor.execute('create table departures(id INTEGER PRIMARY KEY, \ timetable_id TEXT references timetable(id), \ hour INTEGER, minute INTEGER, mode TEXT, \ @@ -227,18 +232,56 @@ route = self.__get_route(line_id) route_i = 1 for direction, stops in route.items(): stop_i = 1 - for stop in stops: + parent_stop = None + parent_stop_variant = None + for stop in stops[:-1]: if self.verbose: print("stop {} in route {} in line {}".format(stop_i, route_i, line_i)) timetables = self.__get_stop_times(stop['id'], line_id, direction) - cursor.execute('insert into timetables values(?, ?, ?, ?, ?)', - (timetable_id, stop['id'], line_id, stops[-1]['name'], stop_i)) + + if stop_i == 1 and stop['variant_type'] is None: + if self.verbose: + print('stop1 & main') + parent = None + parent_variant = None + parent_stop = stop['id'] + elif stop['variant_type'] == 'in' and stop['variant_first']: + if self.verbose: + print('in & first') + parent = None + parent_variant = None + parent_stop_variant = stop['id'] + elif stop_i > 1 and stop['variant_type'] is None: + if self.verbose: + print('stop>1 & main') + parent = parent_stop + parent_variant = parent_stop_variant + parent_stop = stop['id'] + parent_stop_variant = None + elif stop['variant_type'] is not None and not stop['variant_first']: + if self.verbose: + print('variant & not first') + parent = None + parent_variant = parent_stop_variant + parent_stop_variant = stop['id'] + elif stop['variant_type'] == 'out' and stop['variant_first']: + if self.verbose: + print('out & first') + parent = None + parent_variant = parent_stop + parent_stop_variant = stop['id'] + if stop['variant_type'] == 'out' and stop['variant_last']: + parent_stop_variant = None + + cursor.execute('insert into timetables values(?, ?, ?, ?, ?, ?)', + (timetable_id, stop['id'], line_id, stops[-1]['name'], parent, parent_variant)) for mode, times in timetables.items(): cursor.executemany('insert into departures values(null, ?, ?, ?, ?, ?, \ ?)', [(timetable_id, hour, minute, mode, lowfloor, desc) for hour, minute, desc, lowfloor in times]) stop_i += 1 timetable_id += 1 + route_i += 1 line_i += 1 except KeyboardInterrupt: