From 5d65f9cc731d9af04e8a4c31b5f3f5ac623d0962 Mon Sep 17 00:00:00 2001 From: Aeris Date: Fri, 2 Jul 2021 02:10:42 +0200 Subject: [PATCH] v1 --- Dockerfile | 12 ++++++ docker-compose.yml | 15 ++++++++ server.py | 92 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+) create mode 100644 Dockerfile create mode 100644 docker-compose.yml create mode 100644 server.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c824333 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.8 + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir lxml feedgenerator requests datetime + +RUN mkdir /app + +WORKDIR /app + +ENV PYTHONUNBUFFERED=1 + +CMD ["python", "server.py"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..2b80658 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,15 @@ +version: "2" + +networks: + gitea: + external: false + +services: + server: + build: . + restart: always + mem_limit: 512m + volumes: + - ./server.py:/app/server.py + ports: + - "127.0.0.1:8090:3000" diff --git a/server.py b/server.py new file mode 100644 index 0000000..c29e532 --- /dev/null +++ b/server.py @@ -0,0 +1,92 @@ +#!/usr/bin/python +# coding: utf-8 +from http.server import BaseHTTPRequestHandler,HTTPServer +from lxml import etree +import feedgenerator +import requests +import datetime +import os + + +def fetchBoerse(): + + # fetching the html page + headers = { 'user-agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36', + 'accept-encoding': 'gzip, deflate', + 'accept-language': 'en-US,en;q=0.5', + 'cookie': 'bbsessionhash=5189a2ed214f57e9bbe77ae72ee2f668; bblastvisit=1625125458; bblastactivity=0; bbuserid=7485156; bbpassword=8574e2c835d63e85d75aac1ea733c213' + } + + response = requests.get('https://boerse.im/boerse/audioboerse/hoerbuecher-und-hoerspiele/', headers=headers) + doc = response.text + + # getting the items + tree = etree.HTML(doc) + items = tree.xpath('//*[@id="threadbits_forum_29"]/tr') + + # creating a feed + feed = feedgenerator.Rss201rev2Feed(title="Foo", + link="https://foo/bar", + description="Foo", + language="fr") + + # for each line in the table + for i in items: + # getting the identifier + ids = i.xpath('td[2]/@id') + post_id = 'empty' if len(ids) == 0 else ids[0] + + # getting the link + links = i.xpath('td[2]//a/@href') + link = 'empty' if len(links) == 0 else 'https://boerse.im/boerse/audioboerse/hoerbuecher-und-hoerspiele' + links[0] + + # getting the description + descriptions = i.xpath('td[2]/@title') + description = 'empty' if len(descriptions) == 0 else descriptions[0] + + # getting the title + titles = i.xpath('td[2]//a/text()') + title = 'empty' if len(titles) == 0 else titles[0] + + #time + dates = i.xpath('td[3]//span[1]/text()') + if len(dates) > 0: + n = datetime.datetime.strptime(dates[0], '%H:%M') + date = datetime.timedelta(hours=n.hour,minutes=n.minute) + datetime.datetime.today().replace(hour=0,minute=0,second=0,microsecond=0) + + #date + whens = i.xpath('td[3]/div/text()') + when = 0 + if len(whens) != 0: + when = 0 if (whens[0].find("Heute") > -1) else -1 + date.replace(day=date.day+when) + + feed.add_item( + title=title, + link=link, + description=description, + unique_id=post_id, + pubdate=date + ) + + return(bytes(feed.writeString('utf-8'),'utf-8')) + # f = open("out.xml", "a") + # f.write(feed.writeString('utf-8')) + # f.close() + +class Handler(BaseHTTPRequestHandler): + + def do_GET(self): + if self.path == '/boerse/audiobooks': + self.send_response(200) + self.send_header('Content-type','application/rss+xml; charset=utf8') + self.end_headers() + self.wfile.write(fetchBoerse()) + else: + self.send_response(404) + self.send_header('Content-type','text/html; charset=utf8') + self.end_headers() + self.wfile.write(b"

nothing to see here

") + +server = HTTPServer(('0.0.0.0', 3000), Handler) +server.serve_forever()