386 lines
12 KiB
Python
Executable File
386 lines
12 KiB
Python
Executable File
#!/usr/bin/python
|
|
#
|
|
|
|
import mysql.connector
|
|
import sys
|
|
import feedparser
|
|
import time
|
|
from threading import Timer, Lock
|
|
|
|
import selenium
|
|
from selenium import webdriver
|
|
from selenium.webdriver.common.action_chains import ActionChains
|
|
import lxml.etree as etree
|
|
from cssselect import HTMLTranslator, SelectorError
|
|
import requests as req
|
|
from io import StringIO
|
|
|
|
from spline import gcv,bspline
|
|
#import web
|
|
|
|
DB_HOST = "127.0.0.1"
|
|
DB_USER = "insecure"
|
|
DB_PASSWD = "insecure"
|
|
DB_DATABASE = "insecure"
|
|
ACCOUNT_USERNAME = "foooo@google.com"
|
|
ACCOUNT_PASSWORD = "foorg53"
|
|
|
|
|
|
def getDb():
|
|
db = mysql.connector.connect(
|
|
host=DB_HOST,
|
|
user=DB_USER,
|
|
passwd=DB_PASSWD,
|
|
database=DB_DATABASE)
|
|
return db
|
|
|
|
def tableExist(db, name):
|
|
cursor = db.cursor()
|
|
cursor.execute("""
|
|
SELECT COUNT(*)
|
|
FROM information_schema.tables
|
|
WHERE table_name = '{0}'
|
|
""".format(name))
|
|
if cursor.fetchone()[0] == 1:
|
|
cursor.close()
|
|
return True
|
|
|
|
def createTable(db, name):
|
|
cursor = db.cursor()
|
|
cursor.execute("""
|
|
CREATE TABLE {0}
|
|
(id INT AUTO_INCREMENT PRIMARY KEY,
|
|
title VARCHAR(255),
|
|
author VARCHAR(255),
|
|
content TEXT(65535))
|
|
""".format(name))
|
|
cursor.close()
|
|
|
|
def createTableMa(db, name):
|
|
cursor = db.cursor()
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS {}
|
|
(id INT AUTO_INCREMENT PRIMARY KEY,
|
|
name VARCHAR(255),
|
|
url VARCHAR(255),
|
|
pass VARCHAR(255),
|
|
user VARCHAR(255),
|
|
selector VARCHAR(255),
|
|
selector_login_user VARCHAR(255),
|
|
selector_login_pass VARCHAR(255),
|
|
selector_login_verify VARCHAR(255),
|
|
selector_login_url VARCHAR(255),
|
|
selector_login_verify_url VARCHAR(255))
|
|
""".format(name))
|
|
cursor.close()
|
|
|
|
def createTableIm(db, name):
|
|
if tableExist(db, name):
|
|
return True
|
|
else:
|
|
createTable(db, name)
|
|
createTableIm(db, name)
|
|
|
|
def die():
|
|
sys.exit()
|
|
|
|
def insertFeed(db, table, title, date, author, content):
|
|
cursor = db.cursor()
|
|
cursor.execute("INSERT INTO {0} (title, date, author, content) VALUES ( {1},{2},{3},{4} ) ".format(table,title,date,author,content))
|
|
cursor.close()
|
|
|
|
def queryFeed(db, table, title, date, author):
|
|
cursor = db.cursor()
|
|
cursor.execute("SELECT id FROM {} WHERE tistle = '{}' AND date = '{}'".format(table, title, date))
|
|
d = cursor.fetchone()
|
|
if d is None:
|
|
return -1
|
|
else:
|
|
return int(d)
|
|
|
|
def queryRss(url, rss):
|
|
if rss:
|
|
feed = feedparser.parse(url)
|
|
return feed
|
|
|
|
def getContent(entryUrl, session_cookies, selector):
|
|
r = req.get(entryUrl, cookies=session_cookies)
|
|
print(r.status_code)
|
|
if r.status_code != 200:
|
|
print("Err fetching: " + entryUrl)
|
|
die()
|
|
|
|
selector = HTMLTranslator().css_to_xpath(selector)
|
|
|
|
parser = etree.HTMLParser()
|
|
tree = etree.parse(StringIO(r.text), parser)
|
|
a = tree.xpath(selector)
|
|
|
|
return a.tostring()
|
|
|
|
#p1 = (23.,23.)
|
|
def move_mouse(p1, p2, driver, time_=1000, n=400, click=False):
|
|
|
|
action = ActionChains(driver);
|
|
points = bspline(gcv(p1,p2,12), degree=5, n=n)
|
|
print(points)
|
|
for point in points:
|
|
if point[0] < 0 or point[1] < 0:
|
|
continue
|
|
print(point[0],point[1])
|
|
action.move_by_offset(int(point[0]),int(point[1]));
|
|
action.perform();
|
|
time.sleep(time_/(1000*n))
|
|
if click:
|
|
action.click()
|
|
|
|
def get_loc(e):
|
|
return (e.location['x']+(e.size['width']/3),e.location['y']+(e.size['height']/3))
|
|
|
|
def getSession(user, password, url="https://accounts.ft.com/login"):
|
|
#selenium
|
|
driver = selenium.webdriver.Chrome()
|
|
driver.set_window_position(0, 0)
|
|
driver.set_window_size(3840, 1920)
|
|
driver.get(url)
|
|
|
|
email = driver.find_element_by_id("enter-email")
|
|
pos1 = get_loc(email)
|
|
print(pos1)
|
|
move_mouse((123.3, 334.2),pos1,driver, n=200)
|
|
email.send_keys(user)
|
|
|
|
time.sleep(2)
|
|
|
|
email_submit = driver.find_element_by_id("enter-email-next")
|
|
pos2 = get_loc(email,submit)
|
|
move_mouse(pos1,pos2,driver, n=50)
|
|
email_submit.submit()
|
|
|
|
time.sleep(3)
|
|
|
|
password_el = driver.find_element_by_id("enter-password").send_keys(password)
|
|
pos3 = get_loc(password_el)
|
|
move_mouse(pos2,pos3,driver, n=50)
|
|
password_el.send_keys(password)
|
|
|
|
time.sleep(5)
|
|
|
|
button = driver.find_element_by_css_selector(".o-buttons--primary.o-buttons--big.main-button")
|
|
pos4 = get_loc(button)
|
|
move_mouse(pos3,pos4,driver, n=50, click=True)
|
|
time.sleep(20)
|
|
|
|
cookies = driver.get_cookies()
|
|
cookies_dict = {}
|
|
for cookie in cookies:
|
|
cookies_dict[cookie['name']] = cookie['value']
|
|
print(cookies_dict)
|
|
die()
|
|
return cookies
|
|
|
|
#url: https://www.ft.com/myaccount
|
|
|
|
def verifySession(session, user, url="https://www.ft.com/myaccount"):
|
|
|
|
driver = selenium.webdriver.Chrome()
|
|
driver.add_cookie(session)
|
|
driver.get(url)
|
|
q = driver.find_elements_by_id("rightRailEmailAddress")
|
|
for i in q:
|
|
if i.text.find(user) is not None:
|
|
return True
|
|
|
|
def updateFeed(db, table):
|
|
cookies = getSession(ACCOUNT_USERNAME, ACCOUNT_PASSWORD, "https://accounts.ft.com/login")
|
|
if not verifySession(cookies, ACCOUNT_USERNAME, "https://www.ft.com/myaccount"):
|
|
print("Session couldnt be verified")
|
|
return
|
|
feed = queryRss("https://www.ft.com/world?format=rss", True)
|
|
for post in feed.entries:
|
|
if( queryFeed(db,table, post.title, post.created, post.author) < 0):
|
|
content = getContent(post.link, cookies,"img.n-image,div.article__content-body.n-content-body.js-article__content-bod" )
|
|
insertFeed(db, table, post.title, post.created, post.author, content)
|
|
|
|
def updateFeedFromDb(db, table, feed):
|
|
cookies = getSession(feed.username, feed.password, feed.selector_login_url)
|
|
if not verifySession(cookies, feed.username, feed.selector_verify_url):
|
|
print("Session couldnt be verified")
|
|
return
|
|
feed = queryRss(feed.url, True)
|
|
for post in feed.entries:
|
|
if( queryFeed(db,table, post.title, post.created, post.author) < 0):
|
|
content = getContent(post.link, cookies,feed.selector )
|
|
insertFeed(db, table, post.title, post.created, post.author, content)
|
|
|
|
|
|
|
|
def main():
|
|
db = getDb()
|
|
createTableMa(db, "feeds")
|
|
if not createTableIm(db, "feed01"):
|
|
die()
|
|
updateFeed(db,"feed01")
|
|
#rt = Periodic(60*60*1, updateFeed, [db, "feed01"])
|
|
while True:
|
|
time.sleep(5)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|
|
class lf:
|
|
|
|
def __init__(self, db, name):
|
|
self.name = name
|
|
self.db = db
|
|
self.url = ""
|
|
self.password = ""
|
|
self.username = ""
|
|
self.selector = ""
|
|
self.selector_login_user = ""
|
|
self.selector_login_pass = ""
|
|
self.selector_login_verify = ""
|
|
self.selector_login_url = ""
|
|
self.selector_login_verify_url = ""
|
|
self.init_from_db()
|
|
|
|
def init_from_db(self):
|
|
# Assumes it works
|
|
self.url = self.query_db("url")
|
|
self.password = self.query_db("password")
|
|
self.username = self.query_db("username")
|
|
self.selector = self.query_db("selector")
|
|
self.selector_login_user = self.query_db("selector_login_user")
|
|
self.selector_login_pass = self.query_db("selector_login_pass")
|
|
self.selector_login_verify = self.query_db("selector_login_verify")
|
|
self.selector_login_url = self.query_db("selector_login_url")
|
|
self.selector_login_verify_url = self.query_db("selector_login_verify_url")
|
|
|
|
def init_from_scratch(
|
|
self,
|
|
name,
|
|
url,
|
|
password,
|
|
username,
|
|
selector,
|
|
selector_login_user,
|
|
selector_login_pass,
|
|
selector_login_verify,
|
|
selector_login_verify_url,
|
|
selector_login_url):
|
|
self.name = name
|
|
self.url = url
|
|
self.password = password
|
|
self.username = username
|
|
self.selector = selector
|
|
self.selector_login_user = selector_login_user
|
|
self.selector_login_pass = selector_login_pass
|
|
self.selector_login_verify = selector_login_verify
|
|
self.selector_login_url = selector_login_url
|
|
self.selector_login_verify_url = selector_login_verify_url
|
|
|
|
def query_db(self, string):
|
|
cursor = self.db.cursor()
|
|
cursor.execute("""
|
|
SELECT {} FROM feeds
|
|
WHERE name = '{}'
|
|
""".format(string,self.name)
|
|
)
|
|
result = cursor.fetchone()
|
|
cursor.close()
|
|
if result is not None:
|
|
return result
|
|
else:
|
|
return ""
|
|
|
|
def query_db(self, string, value):
|
|
cursor = self.db.cursor()
|
|
cursor.execute("""
|
|
UPDATE SET {}='{}' FROM feeds
|
|
WHERE name = '{}'
|
|
""".format(string,value,self.name)
|
|
)
|
|
cursor.close()
|
|
|
|
|
|
def write_db(self):
|
|
cursor = self.db.cursor()
|
|
cursor.execute("SELECT id FROM feeds WHERE name = '{}'".format(self.name))
|
|
if cursor.fetchone() is None:
|
|
cursor.execute("""
|
|
INSERT INTO {} (name, url, password, username,
|
|
selector, selector_login_user,selector_login_pass,
|
|
selector_login_verify,selector_login_url,
|
|
selector_login_verify_url)
|
|
VALUES ({},{},{},{},{},{},{},{},{},{},{})
|
|
""".format(
|
|
"feeds",self.name,self.url,self.password,
|
|
self.username,self.selector,
|
|
self.selector_login_user,self.selector_login_pass,
|
|
self.selector_login_verify,self.selector_login_url,
|
|
self.selector_login_verify_url)
|
|
)
|
|
else:
|
|
cursor.execute("""
|
|
UPDATE feeds SET url = '{}', password = '{}', username = '{}',
|
|
selector = '{}', selector_login_user = '{}',selector_login_pass = '{}',
|
|
selector_login_verify = '{}',selector_login_url = '{}',
|
|
selector_login_verify_url = '{}')
|
|
VALUES ({},{},{},{},{},{},{},{},{})
|
|
""".format(
|
|
self.url,self.password,
|
|
self.username,self.selector,
|
|
self.selector_login_user,self.selector_login_pass,
|
|
self.selector_login_verify,self.selector_login_url,
|
|
self.selector_login_verify_url)
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
class Periodic(object):
|
|
"""
|
|
A periodic task running in threading.Timers
|
|
"""
|
|
|
|
def __init__(self, interval, function, *args, **kwargs):
|
|
self._lock = Lock()
|
|
self._timer = None
|
|
self.function = function
|
|
self.interval = interval
|
|
self.args = args
|
|
self.kwargs = kwargs
|
|
self._stopped = True
|
|
if kwargs.pop('autostart', True):
|
|
self.start()
|
|
|
|
def start(self, from_run=False):
|
|
self._lock.acquire()
|
|
if from_run or self._stopped:
|
|
self._stopped = False
|
|
self._timer = Timer(self.interval, self._run)
|
|
self._timer.start()
|
|
self._lock.release()
|
|
|
|
def _run(self):
|
|
self.start(from_run=True)
|
|
self.function(*self.args, **self.kwargs)
|
|
|
|
def stop(self):
|
|
self._lock.acquire()
|
|
self._stopped = True
|
|
self._timer.cancel()
|
|
self._lock.release()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|