v1

2021-07-02 02:10:42 +02:00 · 2021-07-02 02:10:42 +02:00 · 5d65f9cc73
commit 5d65f9cc73
3 changed files with 119 additions and 0 deletions
--- a/12
+++ b/12
@ -0,0 +1,12 @@
 FROM python:3.8
 RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir lxml feedgenerator requests datetime 
 RUN mkdir /app
 WORKDIR /app
 ENV PYTHONUNBUFFERED=1
 CMD ["python", "server.py"]
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,15 @@
 version: "2"
 networks:
  gitea:
    external: false
 services:
  server:
    build: .
    restart: always
    mem_limit: 512m
    volumes:
      - ./server.py:/app/server.py
    ports:
      - "127.0.0.1:8090:3000"
--- a/server.py
+++ b/server.py
@ -0,0 +1,92 @@
 #!/usr/bin/python
 # coding: utf-8
 from http.server import BaseHTTPRequestHandler,HTTPServer
 from lxml import etree
 import feedgenerator
 import requests
 import datetime
 import os
 def fetchBoerse():
    # fetching the html page
    headers = { 'user-agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36',
                'accept-encoding': 'gzip, deflate',
                'accept-language': 'en-US,en;q=0.5',
                'cookie': 'bbsessionhash=5189a2ed214f57e9bbe77ae72ee2f668; bblastvisit=1625125458; bblastactivity=0; bbuserid=7485156; bbpassword=8574e2c835d63e85d75aac1ea733c213'
                }
    response = requests.get('https://boerse.im/boerse/audioboerse/hoerbuecher-und-hoerspiele/', headers=headers)
    doc = response.text
    # getting the items 
    tree = etree.HTML(doc)
    items = tree.xpath('//*[@id="threadbits_forum_29"]/tr')
    # creating a feed
    feed = feedgenerator.Rss201rev2Feed(title="Foo",
            link="https://foo/bar",
            description="Foo",
            language="fr")
    # for each line in the table
    for i in items:
        # getting the identifier
        ids = i.xpath('td[2]/@id')
        post_id = 'empty' if len(ids) == 0 else ids[0]
        # getting the link
        links = i.xpath('td[2]//a/@href')
        link = 'empty' if len(links) == 0 else 'https://boerse.im/boerse/audioboerse/hoerbuecher-und-hoerspiele' + links[0]
        # getting the description
        descriptions = i.xpath('td[2]/@title')
        description = 'empty' if len(descriptions) == 0 else descriptions[0]
        # getting the title
        titles = i.xpath('td[2]//a/text()')
        title = 'empty' if len(titles) == 0 else titles[0]
        #time
        dates = i.xpath('td[3]//span[1]/text()')
        if len(dates) > 0:
            n = datetime.datetime.strptime(dates[0], '%H:%M')
            date = datetime.timedelta(hours=n.hour,minutes=n.minute) + datetime.datetime.today().replace(hour=0,minute=0,second=0,microsecond=0)
        #date
        whens = i.xpath('td[3]/div/text()')
        when = 0
        if len(whens) != 0:
            when = 0 if (whens[0].find("Heute") > -1) else -1
            date.replace(day=date.day+when)
        feed.add_item(
            title=title,
            link=link,
            description=description,
            unique_id=post_id,
            pubdate=date
        )
    return(bytes(feed.writeString('utf-8'),'utf-8'))
    # f = open("out.xml", "a")
    # f.write(feed.writeString('utf-8'))
    # f.close()
 class Handler(BaseHTTPRequestHandler):
        def do_GET(self):
            if self.path == '/boerse/audiobooks':
                self.send_response(200)
                self.send_header('Content-type','application/rss+xml; charset=utf8')
                self.end_headers()
                self.wfile.write(fetchBoerse())
            else:
                self.send_response(404)
                self.send_header('Content-type','text/html; charset=utf8')
                self.end_headers()
                self.wfile.write(b"<h2>nothing to see here</h2>")
 server = HTTPServer(('0.0.0.0', 3000), Handler)
 server.serve_forever()