#!/usr/bin/python # import mysql.connector import sys import feedparser import time from threading import Timer, Lock import selenium from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains import lxml.etree as etree from cssselect import HTMLTranslator, SelectorError import requests as req from io import StringIO from spline import gcv,bspline #import web DB_HOST = "127.0.0.1" DB_USER = "insecure" DB_PASSWD = "insecure" DB_DATABASE = "insecure" ACCOUNT_USERNAME = "foooo@google.com" ACCOUNT_PASSWORD = "foorg53" def getDb(): db = mysql.connector.connect( host=DB_HOST, user=DB_USER, passwd=DB_PASSWD, database=DB_DATABASE) return db def tableExist(db, name): cursor = db.cursor() cursor.execute(""" SELECT COUNT(*) FROM information_schema.tables WHERE table_name = '{0}' """.format(name)) if cursor.fetchone()[0] == 1: cursor.close() return True def createTable(db, name): cursor = db.cursor() cursor.execute(""" CREATE TABLE {0} (id INT AUTO_INCREMENT PRIMARY KEY, title VARCHAR(255), author VARCHAR(255), content TEXT(65535)) """.format(name)) cursor.close() def createTableMa(db, name): cursor = db.cursor() cursor.execute(""" CREATE TABLE IF NOT EXISTS {} (id INT AUTO_INCREMENT PRIMARY KEY, name VARCHAR(255), url VARCHAR(255), pass VARCHAR(255), user VARCHAR(255), selector VARCHAR(255), selector_login_user VARCHAR(255), selector_login_pass VARCHAR(255), selector_login_verify VARCHAR(255), selector_login_url VARCHAR(255), selector_login_verify_url VARCHAR(255)) """.format(name)) cursor.close() def createTableIm(db, name): if tableExist(db, name): return True else: createTable(db, name) createTableIm(db, name) def die(): sys.exit() def insertFeed(db, table, title, date, author, content): cursor = db.cursor() cursor.execute("INSERT INTO {0} (title, date, author, content) VALUES ( {1},{2},{3},{4} ) ".format(table,title,date,author,content)) cursor.close() def queryFeed(db, table, title, date, author): cursor = db.cursor() cursor.execute("SELECT id FROM {} WHERE tistle = '{}' AND date = '{}'".format(table, title, date)) d = cursor.fetchone() if d is None: return -1 else: return int(d) def queryRss(url, rss): if rss: feed = feedparser.parse(url) return feed def getContent(entryUrl, session_cookies, selector): r = req.get(entryUrl, cookies=session_cookies) print(r.status_code) if r.status_code != 200: print("Err fetching: " + entryUrl) die() selector = HTMLTranslator().css_to_xpath(selector) parser = etree.HTMLParser() tree = etree.parse(StringIO(r.text), parser) a = tree.xpath(selector) return a.tostring() #p1 = (23.,23.) def move_mouse(p1, p2, driver, time_=1000, n=400, click=False): action = ActionChains(driver); points = bspline(gcv(p1,p2,12), degree=5, n=n) print(points) for point in points: if point[0] < 0 or point[1] < 0: continue print(point[0],point[1]) action.move_by_offset(int(point[0]),int(point[1])); action.perform(); time.sleep(time_/(1000*n)) if click: action.click() def get_loc(e): return (e.location['x']+(e.size['width']/3),e.location['y']+(e.size['height']/3)) def getSession(user, password, url="https://accounts.ft.com/login"): #selenium driver = selenium.webdriver.Chrome() driver.set_window_position(0, 0) driver.set_window_size(3840, 1920) driver.get(url) email = driver.find_element_by_id("enter-email") pos1 = get_loc(email) print(pos1) move_mouse((123.3, 334.2),pos1,driver, n=200) email.send_keys(user) time.sleep(2) email_submit = driver.find_element_by_id("enter-email-next") pos2 = get_loc(email,submit) move_mouse(pos1,pos2,driver, n=50) email_submit.submit() time.sleep(3) password_el = driver.find_element_by_id("enter-password").send_keys(password) pos3 = get_loc(password_el) move_mouse(pos2,pos3,driver, n=50) password_el.send_keys(password) time.sleep(5) button = driver.find_element_by_css_selector(".o-buttons--primary.o-buttons--big.main-button") pos4 = get_loc(button) move_mouse(pos3,pos4,driver, n=50, click=True) time.sleep(20) cookies = driver.get_cookies() cookies_dict = {} for cookie in cookies: cookies_dict[cookie['name']] = cookie['value'] print(cookies_dict) die() return cookies #url: https://www.ft.com/myaccount def verifySession(session, user, url="https://www.ft.com/myaccount"): driver = selenium.webdriver.Chrome() driver.add_cookie(session) driver.get(url) q = driver.find_elements_by_id("rightRailEmailAddress") for i in q: if i.text.find(user) is not None: return True def updateFeed(db, table): cookies = getSession(ACCOUNT_USERNAME, ACCOUNT_PASSWORD, "https://accounts.ft.com/login") if not verifySession(cookies, ACCOUNT_USERNAME, "https://www.ft.com/myaccount"): print("Session couldnt be verified") return feed = queryRss("https://www.ft.com/world?format=rss", True) for post in feed.entries: if( queryFeed(db,table, post.title, post.created, post.author) < 0): content = getContent(post.link, cookies,"img.n-image,div.article__content-body.n-content-body.js-article__content-bod" ) insertFeed(db, table, post.title, post.created, post.author, content) def updateFeedFromDb(db, table, feed): cookies = getSession(feed.username, feed.password, feed.selector_login_url) if not verifySession(cookies, feed.username, feed.selector_verify_url): print("Session couldnt be verified") return feed = queryRss(feed.url, True) for post in feed.entries: if( queryFeed(db,table, post.title, post.created, post.author) < 0): content = getContent(post.link, cookies,feed.selector ) insertFeed(db, table, post.title, post.created, post.author, content) def main(): db = getDb() createTableMa(db, "feeds") if not createTableIm(db, "feed01"): die() updateFeed(db,"feed01") #rt = Periodic(60*60*1, updateFeed, [db, "feed01"]) while True: time.sleep(5) if __name__ == "__main__": main() class lf: def __init__(self, db, name): self.name = name self.db = db self.url = "" self.password = "" self.username = "" self.selector = "" self.selector_login_user = "" self.selector_login_pass = "" self.selector_login_verify = "" self.selector_login_url = "" self.selector_login_verify_url = "" self.init_from_db() def init_from_db(self): # Assumes it works self.url = self.query_db("url") self.password = self.query_db("password") self.username = self.query_db("username") self.selector = self.query_db("selector") self.selector_login_user = self.query_db("selector_login_user") self.selector_login_pass = self.query_db("selector_login_pass") self.selector_login_verify = self.query_db("selector_login_verify") self.selector_login_url = self.query_db("selector_login_url") self.selector_login_verify_url = self.query_db("selector_login_verify_url") def init_from_scratch( self, name, url, password, username, selector, selector_login_user, selector_login_pass, selector_login_verify, selector_login_verify_url, selector_login_url): self.name = name self.url = url self.password = password self.username = username self.selector = selector self.selector_login_user = selector_login_user self.selector_login_pass = selector_login_pass self.selector_login_verify = selector_login_verify self.selector_login_url = selector_login_url self.selector_login_verify_url = selector_login_verify_url def query_db(self, string): cursor = self.db.cursor() cursor.execute(""" SELECT {} FROM feeds WHERE name = '{}' """.format(string,self.name) ) result = cursor.fetchone() cursor.close() if result is not None: return result else: return "" def query_db(self, string, value): cursor = self.db.cursor() cursor.execute(""" UPDATE SET {}='{}' FROM feeds WHERE name = '{}' """.format(string,value,self.name) ) cursor.close() def write_db(self): cursor = self.db.cursor() cursor.execute("SELECT id FROM feeds WHERE name = '{}'".format(self.name)) if cursor.fetchone() is None: cursor.execute(""" INSERT INTO {} (name, url, password, username, selector, selector_login_user,selector_login_pass, selector_login_verify,selector_login_url, selector_login_verify_url) VALUES ({},{},{},{},{},{},{},{},{},{},{}) """.format( "feeds",self.name,self.url,self.password, self.username,self.selector, self.selector_login_user,self.selector_login_pass, self.selector_login_verify,self.selector_login_url, self.selector_login_verify_url) ) else: cursor.execute(""" UPDATE feeds SET url = '{}', password = '{}', username = '{}', selector = '{}', selector_login_user = '{}',selector_login_pass = '{}', selector_login_verify = '{}',selector_login_url = '{}', selector_login_verify_url = '{}') VALUES ({},{},{},{},{},{},{},{},{}) """.format( self.url,self.password, self.username,self.selector, self.selector_login_user,self.selector_login_pass, self.selector_login_verify,self.selector_login_url, self.selector_login_verify_url) ) class Periodic(object): """ A periodic task running in threading.Timers """ def __init__(self, interval, function, *args, **kwargs): self._lock = Lock() self._timer = None self.function = function self.interval = interval self.args = args self.kwargs = kwargs self._stopped = True if kwargs.pop('autostart', True): self.start() def start(self, from_run=False): self._lock.acquire() if from_run or self._stopped: self._stopped = False self._timer = Timer(self.interval, self._run) self._timer.start() self._lock.release() def _run(self): self.start(from_run=True) self.function(*self.args, **self.kwargs) def stop(self): self._lock.acquire() self._stopped = True self._timer.cancel() self._lock.release()