#!/usr/bin/python # coding: utf-8 from http.server import BaseHTTPRequestHandler,HTTPServer from lxml import etree import feedgenerator import requests import datetime import os def fetchBoerse(): # fetching the html page headers = { 'user-agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36', 'accept-encoding': 'gzip, deflate', 'accept-language': 'en-US,en;q=0.5', 'cookie': 'bbsessionhash=5189a2ed214f57e9bbe77ae72ee2f668; bblastvisit=1625125458; bblastactivity=0; bbuserid=7485156; bbpassword=8574e2c835d63e85d75aac1ea733c213' } response = requests.get('https://boerse.im/boerse/audioboerse/hoerbuecher-und-hoerspiele/', headers=headers) doc = response.text # getting the items tree = etree.HTML(doc) items = tree.xpath('//*[@id="threadbits_forum_29"]/tr') # creating a feed feed = feedgenerator.Rss201rev2Feed(title="Foo", link="https://foo/bar", description="Foo", language="fr") # for each line in the table for i in items: # getting the identifier ids = i.xpath('td[2]/@id') post_id = 'empty' if len(ids) == 0 else ids[0] # getting the link links = i.xpath('td[2]//a/@href') link = 'empty' if len(links) == 0 else 'https://boerse.im/boerse/audioboerse/hoerbuecher-und-hoerspiele' + links[0] # getting the description descriptions = i.xpath('td[2]/@title') description = 'empty' if len(descriptions) == 0 else descriptions[0] # getting the title titles = i.xpath('td[2]//a/text()') title = 'empty' if len(titles) == 0 else titles[0] #time dates = i.xpath('td[3]//span[1]/text()') if len(dates) > 0: n = datetime.datetime.strptime(dates[0], '%H:%M') date = datetime.timedelta(hours=n.hour,minutes=n.minute) + datetime.datetime.today().replace(hour=0,minute=0,second=0,microsecond=0) #date whens = i.xpath('td[3]/div/text()') when = 0 if len(whens) != 0: when = 0 if (whens[0].find("Heute") > -1) else -1 date.replace(day=date.day+when) feed.add_item( title=title, link=link, description=description, unique_id=post_id, pubdate=date ) return(bytes(feed.writeString('utf-8'),'utf-8')) # f = open("out.xml", "a") # f.write(feed.writeString('utf-8')) # f.close() class Handler(BaseHTTPRequestHandler): def do_GET(self): if self.path == '/boerse/audiobooks': self.send_response(200) self.send_header('Content-type','application/rss+xml; charset=utf8') self.end_headers() self.wfile.write(fetchBoerse()) else: self.send_response(404) self.send_header('Content-type','text/html; charset=utf8') self.end_headers() self.wfile.write(b"

nothing to see here

") server = HTTPServer(('0.0.0.0', 3000), Handler) server.serve_forever()