v1

2021-07-02 02:10:42 +02:00 · 2021-07-02 02:10:42 +02:00 · 5d65f9cc73
commit 5d65f9cc73
3 changed files with 119 additions and 0 deletions
--- a/12
+++ b/12
@ -0,0 +1,12 @@
+FROM python:3.8
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir lxml feedgenerator requests datetime 
+
+RUN mkdir /app
+
+WORKDIR /app
+
+ENV PYTHONUNBUFFERED=1
+
+CMD ["python", "server.py"]
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,15 @@
+version: "2"
+
+networks:
+  gitea:
+    external: false
+
+services:
+  server:
+    build: .
+    restart: always
+    mem_limit: 512m
+    volumes:
+      - ./server.py:/app/server.py
+    ports:
+      - "127.0.0.1:8090:3000"
--- a/server.py
+++ b/server.py
@ -0,0 +1,92 @@
+#!/usr/bin/python
+# coding: utf-8
+from http.server import BaseHTTPRequestHandler,HTTPServer
+from lxml import etree
+import feedgenerator
+import requests
+import datetime
+import os
+
+
+def fetchBoerse():
+
+    # fetching the html page
+    headers = { 'user-agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36',
+                'accept-encoding': 'gzip, deflate',
+                'accept-language': 'en-US,en;q=0.5',
+                'cookie': 'bbsessionhash=5189a2ed214f57e9bbe77ae72ee2f668; bblastvisit=1625125458; bblastactivity=0; bbuserid=7485156; bbpassword=8574e2c835d63e85d75aac1ea733c213'
+                }
+                
+    response = requests.get('https://boerse.im/boerse/audioboerse/hoerbuecher-und-hoerspiele/', headers=headers)
+    doc = response.text
+
+    # getting the items 
+    tree = etree.HTML(doc)
+    items = tree.xpath('//*[@id="threadbits_forum_29"]/tr')
+
+    # creating a feed
+    feed = feedgenerator.Rss201rev2Feed(title="Foo",
+            link="https://foo/bar",
+            description="Foo",
+            language="fr")
+
+    # for each line in the table
+    for i in items:
+        # getting the identifier
+        ids = i.xpath('td[2]/@id')
+        post_id = 'empty' if len(ids) == 0 else ids[0]
+
+        # getting the link
+        links = i.xpath('td[2]//a/@href')
+        link = 'empty' if len(links) == 0 else 'https://boerse.im/boerse/audioboerse/hoerbuecher-und-hoerspiele' + links[0]
+
+        # getting the description
+        descriptions = i.xpath('td[2]/@title')
+        description = 'empty' if len(descriptions) == 0 else descriptions[0]
+
+        # getting the title
+        titles = i.xpath('td[2]//a/text()')
+        title = 'empty' if len(titles) == 0 else titles[0]
+
+        #time
+        dates = i.xpath('td[3]//span[1]/text()')
+        if len(dates) > 0:
+            n = datetime.datetime.strptime(dates[0], '%H:%M')
+            date = datetime.timedelta(hours=n.hour,minutes=n.minute) + datetime.datetime.today().replace(hour=0,minute=0,second=0,microsecond=0)
+
+        #date
+        whens = i.xpath('td[3]/div/text()')
+        when = 0
+        if len(whens) != 0:
+            when = 0 if (whens[0].find("Heute") > -1) else -1
+            date.replace(day=date.day+when)
+
+        feed.add_item(
+            title=title,
+            link=link,
+            description=description,
+            unique_id=post_id,
+            pubdate=date
+        )
+
+    return(bytes(feed.writeString('utf-8'),'utf-8'))
+    # f = open("out.xml", "a")
+    # f.write(feed.writeString('utf-8'))
+    # f.close()
+
+class Handler(BaseHTTPRequestHandler):
+    
+        def do_GET(self):
+            if self.path == '/boerse/audiobooks':
+                self.send_response(200)
+                self.send_header('Content-type','application/rss+xml; charset=utf8')
+                self.end_headers()
+                self.wfile.write(fetchBoerse())
+            else:
+                self.send_response(404)
+                self.send_header('Content-type','text/html; charset=utf8')
+                self.end_headers()
+                self.wfile.write(b"<h2>nothing to see here</h2>")
+
+server = HTTPServer(('0.0.0.0', 3000), Handler)
+server.serve_forever()