Author: Adam Pioterek <adam.pioterek@protonmail.ch>
new formula to decide valid timetables & small post, script asks for timetables
.gitignore | 2 +- converter/README.md | 5 ++++- converter/local/converter.py | 17 +++++++++++++++-- converter/local/uploader.py | 15 ++++++++------- | 0 converter/server/upload.php | 20 ++++++++++++++++---- research/datasources.md | 1 +
diff --git a/.gitignore b/.gitignore index c3608d1c1f5c3319817ba5555ee66c8051e5787b..ed8d2dc9b4c0c3278e42a3643b770da815b0193c 100644 --- a/.gitignore +++ b/.gitignore @@ -51,4 +51,4 @@ database/timetable* converter/local/*.gz converter/local/metadata.yml converter/local/__pycache__ -converter/local/sign_key.py +converter/local/config.py diff --git a/converter/README.md b/converter/README.md index e9365e10d38549ca710da2112a38f86835a9af7e..490b2e69c5a73f1e2c28494c7356e028f91169db 100644 --- a/converter/README.md +++ b/converter/README.md @@ -1,7 +1,7 @@ ## Key file In order to upload timetables automatically, private key needs to be provided to `uploader.py`. -`uploader.py` imports `sign_key.py` which needs to be as follows: +`uploader.py` imports `config.py` which needs to be as follows: ``` import nacl.signing @@ -9,6 +9,9 @@ key = nacl.signing.SigningKey( b'<here goes hexdigest of private key (64 hexadecimal digits)>', encoder=nacl.encoding.HexEncoder) + +storage = '<url pointing to where timetables lie with {} placeholder for id>' +receiver = '<url to server script which receives commands to store timetables>' ``` ## License diff --git a/converter/local/converter.py b/converter/local/converter.py old mode 100755 new mode 100644 index 4bd61663cdbe4139913ca322162af23accbe3640..8c817c00e9c0a965d79f605e37d95f2f8d3a94fc --- a/converter/local/converter.py +++ b/converter/local/converter.py @@ -14,6 +14,7 @@ from pathlib import Path import hashlib import gzip import shutil +import dateutil.parser import uploader @@ -76,8 +77,18 @@ today = date.today().strftime('%Y%m%d') start, end = name.split('_') return today < start + def __validity_length(self, name1, name2): + x = dateutil.parser.parse(name1) + y = dateutil.parser.parse(name2) + return (y - x).days + + def __sort_key(self, name): + s, e = name.split('_') + return s + "{0:03}".format(100 - self.__validity_length(s, e)) + def __clean_overlapping(self, names): - names.sort() + names.sort(key=self.__sort_key) + print(names) if len(names) == 1: return names return_names = [] @@ -85,8 +96,9 @@ i = 1 for name in names[1:]: this_start, this_end = name.split('_') prev_start, prev_end = names[i-1].split('_') - if this_start <= prev_start or this_end > prev_end: + if not (this_start < prev_end or this_start == prev_start): return_names.append(names[i-1]) + i = i + 1 return_names.append(names[-1]) return return_names @@ -111,6 +123,7 @@ def __compress(self, checksum): with open('timetable.db', 'rb') as f_in: with gzip.open('{}.db.gz'.format(checksum), 'wb') as f_out: shutil.copyfileobj(f_in, f_out) + os.chmod('{}.db.gz'.format(checksum), 0o644) Path('timetable.db').unlink() diff --git a/converter/local/uploader.py b/converter/local/uploader.py index 938424004aaac1508ba10f79fa5e3e51defc70c2..6be8579ae8bfa8ef9dce1504df47fe019908c5ec 100644 --- a/converter/local/uploader.py +++ b/converter/local/uploader.py @@ -4,7 +4,7 @@ import re import os import hashlib -import sign_key +import config def upload(): @@ -14,17 +14,18 @@ timetablesIds = [filename.split('.')[0] for filename in os.listdir('.') if re.match('^.*\\.db\\.gz$', filename)] timetables = {} for tId in timetablesIds: - with open(f'{tId}.db.gz', 'rb') as f: - timetables[tId] = {'t': f.read(), 'sha': ''} - timetables[tId]['sha'] = hashlib.sha256(timetables[tId]['t']).\ - hexdigest() + with open('{}.db.gz'.format(tId), 'rb') as f: + timetables[tId] = {'t': '', 'sha': ''} + t = f.read() + timetables[tId]['t'] = config.storage.format(tId) + timetables[tId]['sha'] = hashlib.sha256(t).hexdigest() - signature = sign_key.key.sign(bytes(metadata, 'utf-8')) + signature = config.key.sign(bytes(metadata, 'utf-8')) data = msgpack.packb({'metadata': metadata, 'timetables': timetables, 'signature': signature.signature}, use_bin_type=True) session = requests.Session() - response = session.post('http://localhost:8000/upload.php', data) + response = session.post(config.receiver, data) print(response) print(response.text) diff --git a/converter/server/gtfs.php b/converter/server/gtfs.php deleted file mode 100644 index 58d0e7a2287ab6b41a8d0a100216b76f80d77366..0000000000000000000000000000000000000000 --- a/converter/server/gtfs.php +++ /dev/null @@ -1,35 +0,0 @@ -<?php - -require 'vendor/mustangostang/spyc/Spyc.php'; - -$metadata = Spyc::YAMLLoad('metadata.yml'); -$today = date('Ymd'); -$current = ''; -$sizeU = ''; -$sizeC = ''; - -foreach($metadata as $row) { - $start = $row['start']; - $end = $row['end']; - if ($start <= $today and $today <= $end) { - $current = $row['id']; - $sizeU = $row['size_uncompressed']; - $sizeC = $row['size_compressed']; - break; - } -} -unset($row); - -$etag = $_SERVER['HTTP_IF_NONE_MATCH']; - -if ($etag == $current) { - http_response_code(304); -} else { - header("ETag: $current"); - header('Content-Type: application/octet-stream'); - header('Content-Disposition: attachment; filename="timetable.db.gz"'); - header('Content-Length: ' . filesize("$current.db.gz")); - header('X-Uncompressed-Content-Length: ' . $sizeU); - readfile("$current.db.gz"); -} -?> diff --git a/converter/server/index.php b/converter/server/index.php new file mode 100644 index 0000000000000000000000000000000000000000..58d0e7a2287ab6b41a8d0a100216b76f80d77366 --- /dev/null +++ b/converter/server/index.php @@ -0,0 +1,35 @@ +<?php + +require 'vendor/mustangostang/spyc/Spyc.php'; + +$metadata = Spyc::YAMLLoad('metadata.yml'); +$today = date('Ymd'); +$current = ''; +$sizeU = ''; +$sizeC = ''; + +foreach($metadata as $row) { + $start = $row['start']; + $end = $row['end']; + if ($start <= $today and $today <= $end) { + $current = $row['id']; + $sizeU = $row['size_uncompressed']; + $sizeC = $row['size_compressed']; + break; + } +} +unset($row); + +$etag = $_SERVER['HTTP_IF_NONE_MATCH']; + +if ($etag == $current) { + http_response_code(304); +} else { + header("ETag: $current"); + header('Content-Type: application/octet-stream'); + header('Content-Disposition: attachment; filename="timetable.db.gz"'); + header('Content-Length: ' . filesize("$current.db.gz")); + header('X-Uncompressed-Content-Length: ' . $sizeU); + readfile("$current.db.gz"); +} +?> diff --git a/converter/server/upload.php b/converter/server/upload.php index c07eea8bcf039adb6be546f2a91d2c9e651fc88f..d95574fc8c3975c3a70054c8bab8f5cde9bee938 100644 --- a/converter/server/upload.php +++ b/converter/server/upload.php @@ -1,5 +1,7 @@ <?php +set_time_limit(0); + require_once 'vendor/paragonie/sodium_compat/autoload.php'; require_once 'vendor/mustangostang/spyc/Spyc.php'; require_once 'vendor/autoload.php'; @@ -34,8 +36,21 @@ $timetables = $post['timetables']; foreach ($timetables as $id => $timetable) { $t = $timetable['t']; $sha = $timetable['sha']; - $checksum = hash('sha256', $t); + + // todo if $id in $oldMetadata -> skip + + $fp = fopen(dirname(__FILE__) . "/$id.db.gz", 'wb'); + $ch = curl_init($t); + curl_setopt($ch, CURLOPT_TIMEOUT, 50); + curl_setopt($ch, CURLOPT_FILE, $fp); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + curl_exec($ch); + curl_close($ch); + fclose($fp); + + $checksum = hash_file('sha256', "$id.db.gz"); if ($checksum != $sha) { + unlink("$id.db.gz"); http_response_code(400); die("checksums invalid for $id, expected $sha got $checksum"); } @@ -52,9 +67,6 @@ } $toDelete = array_diff($oldIDs, $newIDs); foreach ($toDelete as $it) { unlink("$it.db.gz"); -} -foreach ($timetables as $id => $timetable) { - file_put_contents("$id.db.gz", $timetable); } file_put_contents('metadata.yml', $post['metadata']); diff --git a/research/datasources.md b/research/datasources.md index 4478128f46d30bd0dc7877eeaaf4f2fd11d824a8..2aa037fcfd2a6040958f950bb02c64685d6170c2 100644 --- a/research/datasources.md +++ b/research/datasources.md @@ -7,3 +7,4 @@ * real-time timetable: http://egov.psnc.pl/node/29#przystanki * wireless: http://egov.psnc.pl/node/29#wireless_poznan * tourism: http://egov.psnc.pl/node/29#turystyka * P&R: https://www.peka.poznan.pl/SOP/parkings/getAvailableSpaces.jspb +* accidents: https://mympk.mpk.poznan.pl/web/menu