Bimba.git

commit 0346c44670b3c8267f99028d9e91995d5442df73

Author: Adam Pioterek <adam.pioterek@protonmail.ch>

new formula to decide valid timetables & small post, script asks for timetables

 .gitignore | 2 +-
 converter/README.md | 5 ++++-
 converter/local/converter.py | 17 +++++++++++++++--
 converter/local/uploader.py | 15 ++++++++-------
  | 0 
 converter/server/upload.php | 20 ++++++++++++++++----
 research/datasources.md | 1 +


diff --git a/.gitignore b/.gitignore
index c3608d1c1f5c3319817ba5555ee66c8051e5787b..ed8d2dc9b4c0c3278e42a3643b770da815b0193c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,4 +51,4 @@ database/timetable*
 converter/local/*.gz
 converter/local/metadata.yml
 converter/local/__pycache__
-converter/local/sign_key.py
+converter/local/config.py




diff --git a/converter/README.md b/converter/README.md
index e9365e10d38549ca710da2112a38f86835a9af7e..490b2e69c5a73f1e2c28494c7356e028f91169db 100644
--- a/converter/README.md
+++ b/converter/README.md
@@ -1,7 +1,7 @@
 ## Key file
 
 In order to upload timetables automatically, private key needs to be provided to `uploader.py`.
-`uploader.py` imports `sign_key.py` which needs to be as follows:
+`uploader.py` imports `config.py` which needs to be as follows:
 
 ```
 import nacl.signing
@@ -9,6 +9,9 @@
 key = nacl.signing.SigningKey(
         b'<here goes hexdigest of private key (64 hexadecimal digits)>',
         encoder=nacl.encoding.HexEncoder)
+
+storage = '<url pointing to where timetables lie with {} placeholder for id>'
+receiver = '<url to server script which receives commands to store timetables>'
 ```
 
 ## License




diff --git a/converter/local/converter.py b/converter/local/converter.py
old mode 100755
new mode 100644
index 4bd61663cdbe4139913ca322162af23accbe3640..8c817c00e9c0a965d79f605e37d95f2f8d3a94fc
--- a/converter/local/converter.py
+++ b/converter/local/converter.py
@@ -14,6 +14,7 @@ from pathlib import Path
 import hashlib
 import gzip
 import shutil
+import dateutil.parser
 
 import uploader
 
@@ -76,8 +77,18 @@         today = date.today().strftime('%Y%m%d')
         start, end = name.split('_')
         return today < start
 
+    def __validity_length(self, name1, name2):
+        x = dateutil.parser.parse(name1)
+        y = dateutil.parser.parse(name2)
+        return (y - x).days
+
+    def __sort_key(self, name):
+        s, e = name.split('_')
+        return s + "{0:03}".format(100 - self.__validity_length(s, e))
+
     def __clean_overlapping(self, names):
-        names.sort()
+        names.sort(key=self.__sort_key)
+        print(names)
         if len(names) == 1:
             return names
         return_names = []
@@ -85,8 +96,9 @@         i = 1
         for name in names[1:]:
             this_start, this_end = name.split('_')
             prev_start, prev_end = names[i-1].split('_')
-            if this_start <= prev_start or this_end > prev_end:
+            if not (this_start < prev_end or this_start == prev_start):
                 return_names.append(names[i-1])
+
             i = i + 1
         return_names.append(names[-1])
         return return_names
@@ -111,6 +123,7 @@     def __compress(self, checksum):
         with open('timetable.db', 'rb') as f_in:
             with gzip.open('{}.db.gz'.format(checksum), 'wb') as f_out:
                 shutil.copyfileobj(f_in, f_out)
+                os.chmod('{}.db.gz'.format(checksum), 0o644)
 
         Path('timetable.db').unlink()
 




diff --git a/converter/local/uploader.py b/converter/local/uploader.py
index 938424004aaac1508ba10f79fa5e3e51defc70c2..6be8579ae8bfa8ef9dce1504df47fe019908c5ec 100644
--- a/converter/local/uploader.py
+++ b/converter/local/uploader.py
@@ -4,7 +4,7 @@ import re
 import os
 import hashlib
 
-import sign_key
+import config
 
 
 def upload():
@@ -14,17 +14,18 @@     timetablesIds = [filename.split('.')[0] for filename in os.listdir('.')
                      if re.match('^.*\\.db\\.gz$', filename)]
     timetables = {}
     for tId in timetablesIds:
-        with open(f'{tId}.db.gz', 'rb') as f:
-            timetables[tId] = {'t': f.read(), 'sha': ''}
-            timetables[tId]['sha'] = hashlib.sha256(timetables[tId]['t']).\
-                hexdigest()
+        with open('{}.db.gz'.format(tId), 'rb') as f:
+            timetables[tId] = {'t': '', 'sha': ''}
+            t = f.read()
+            timetables[tId]['t'] = config.storage.format(tId)
+            timetables[tId]['sha'] = hashlib.sha256(t).hexdigest()
 
-    signature = sign_key.key.sign(bytes(metadata, 'utf-8'))
+    signature = config.key.sign(bytes(metadata, 'utf-8'))
 
     data = msgpack.packb({'metadata': metadata, 'timetables': timetables,
                           'signature': signature.signature}, use_bin_type=True)
 
     session = requests.Session()
-    response = session.post('http://localhost:8000/upload.php', data)
+    response = session.post(config.receiver, data)
     print(response)
     print(response.text)




diff --git a/converter/server/gtfs.php b/converter/server/gtfs.php
deleted file mode 100644
index 58d0e7a2287ab6b41a8d0a100216b76f80d77366..0000000000000000000000000000000000000000
--- a/converter/server/gtfs.php
+++ /dev/null
@@ -1,35 +0,0 @@
-<?php
-
-require 'vendor/mustangostang/spyc/Spyc.php';
-
-$metadata = Spyc::YAMLLoad('metadata.yml');
-$today = date('Ymd');
-$current = '';
-$sizeU = '';
-$sizeC = '';
-
-foreach($metadata as $row) {
-    $start = $row['start'];
-    $end = $row['end'];
-    if ($start <= $today and $today <= $end) {
-        $current = $row['id'];
-        $sizeU = $row['size_uncompressed'];
-        $sizeC = $row['size_compressed'];
-        break;
-    }
-}
-unset($row);
-
-$etag = $_SERVER['HTTP_IF_NONE_MATCH'];
-
-if ($etag == $current) {
-    http_response_code(304);
-} else {
-    header("ETag: $current");
-    header('Content-Type: application/octet-stream');
-    header('Content-Disposition: attachment; filename="timetable.db.gz"');
-    header('Content-Length: ' . filesize("$current.db.gz"));
-    header('X-Uncompressed-Content-Length: ' . $sizeU);
-    readfile("$current.db.gz");
-}
-?>




diff --git a/converter/server/index.php b/converter/server/index.php
new file mode 100644
index 0000000000000000000000000000000000000000..58d0e7a2287ab6b41a8d0a100216b76f80d77366
--- /dev/null
+++ b/converter/server/index.php
@@ -0,0 +1,35 @@
+<?php
+
+require 'vendor/mustangostang/spyc/Spyc.php';
+
+$metadata = Spyc::YAMLLoad('metadata.yml');
+$today = date('Ymd');
+$current = '';
+$sizeU = '';
+$sizeC = '';
+
+foreach($metadata as $row) {
+    $start = $row['start'];
+    $end = $row['end'];
+    if ($start <= $today and $today <= $end) {
+        $current = $row['id'];
+        $sizeU = $row['size_uncompressed'];
+        $sizeC = $row['size_compressed'];
+        break;
+    }
+}
+unset($row);
+
+$etag = $_SERVER['HTTP_IF_NONE_MATCH'];
+
+if ($etag == $current) {
+    http_response_code(304);
+} else {
+    header("ETag: $current");
+    header('Content-Type: application/octet-stream');
+    header('Content-Disposition: attachment; filename="timetable.db.gz"');
+    header('Content-Length: ' . filesize("$current.db.gz"));
+    header('X-Uncompressed-Content-Length: ' . $sizeU);
+    readfile("$current.db.gz");
+}
+?>




diff --git a/converter/server/upload.php b/converter/server/upload.php
index c07eea8bcf039adb6be546f2a91d2c9e651fc88f..d95574fc8c3975c3a70054c8bab8f5cde9bee938 100644
--- a/converter/server/upload.php
+++ b/converter/server/upload.php
@@ -1,5 +1,7 @@
 <?php
 
+set_time_limit(0);
+
 require_once 'vendor/paragonie/sodium_compat/autoload.php';
 require_once 'vendor/mustangostang/spyc/Spyc.php';
 require_once 'vendor/autoload.php';
@@ -34,8 +36,21 @@ $timetables = $post['timetables'];
 foreach ($timetables as $id => $timetable) {
     $t = $timetable['t'];
     $sha = $timetable['sha'];
-    $checksum = hash('sha256', $t);
+
+    // todo if $id in $oldMetadata -> skip
+
+    $fp = fopen(dirname(__FILE__) . "/$id.db.gz", 'wb');
+    $ch = curl_init($t);
+    curl_setopt($ch, CURLOPT_TIMEOUT, 50);
+    curl_setopt($ch, CURLOPT_FILE, $fp);
+    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
+    curl_exec($ch);
+    curl_close($ch);
+    fclose($fp);
+
+    $checksum = hash_file('sha256', "$id.db.gz");
     if ($checksum != $sha) {
+        unlink("$id.db.gz");
         http_response_code(400);
         die("checksums invalid for $id, expected $sha got $checksum");
     }
@@ -52,9 +67,6 @@ }
 $toDelete = array_diff($oldIDs, $newIDs);
 foreach ($toDelete as $it) {
     unlink("$it.db.gz");
-}
-foreach ($timetables as $id => $timetable) {
-    file_put_contents("$id.db.gz", $timetable);
 }
 
 file_put_contents('metadata.yml', $post['metadata']);




diff --git a/research/datasources.md b/research/datasources.md
index 4478128f46d30bd0dc7877eeaaf4f2fd11d824a8..2aa037fcfd2a6040958f950bb02c64685d6170c2 100644
--- a/research/datasources.md
+++ b/research/datasources.md
@@ -7,3 +7,4 @@ * real-time timetable: http://egov.psnc.pl/node/29#przystanki
 * wireless: http://egov.psnc.pl/node/29#wireless_poznan
 * tourism: http://egov.psnc.pl/node/29#turystyka
 * P&R: https://www.peka.poznan.pl/SOP/parkings/getAvailableSpaces.jspb
+* accidents: https://mympk.mpk.poznan.pl/web/menu