Save groupchat messages in an archive, allow bulk history requesting

- WARNING: This change is hilariously unsuitable for public instances. Don't run
  this code in such an environment yet!
- Groupchat messages are now stored in the sqlite3 database, with the intent
  to allow retrieval via MAM at a later date.
  FIXME: You can't opt out of this though, which is a huge GDPR hole.
- You can also request ALL of your group chat history from whatsapp be fetched
  and stored in the database (!!). This is a VERY resource-intensive operation,
  as it involves spawning a metric crapton of threads, uploading a metric
  crapton of historical media to the configured upload server, and writing a
  metric crapton of messages into the database.
  - At some point, the ability to invoke this will be severely limited to only
    approved users. That point has not yet come, though.
  - Additionally, the chat history request will abort if the connection it's
    associated with dies. (You can just retry it, though.)
This commit is contained in:
eta 2020-09-26 19:49:26 +01:00
parent e7c84623e8
commit f979abbd35
4 changed files with 96 additions and 1 deletions

22
db.lisp
View file

@ -141,3 +141,25 @@
(loop
while (sqlite:step-statement get-stmt)
collect (with-bound-columns (localpart subject) get-stmt (cons localpart subject)))))
(defun insert-xmpp-message (xm)
"Inserts XM, a groupchat XMPP-MESSAGE, into the database."
(assert (uiop:string-prefix-p "g" (conversation xm)) () "Tried to insert XMPP message for non-groupchat conversation ~A" (conversation xm))
(let ((chat-id (or
(get-user-chat-id (uid xm) (conversation xm))
(error "Couldn't find chat id for conversation ~A / uid ~A"
(conversation xm) (uid xm))))
(ts-unix (local-time:timestamp-to-unix (timestamp xm))))
(with-prepared-statements
((insert-stmt "INSERT INTO user_chat_history (user_id, chat_id, user_from, ts_unix, xmpp_id, orig_id, body, oob_url) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"))
(bind-parameters insert-stmt (1 (uid xm)) (2 chat-id) (3 (from xm)) (4 ts-unix) (5 (xmpp-id xm)) (6 (orig-id xm)) (7 (body xm)) (8 (oob-url xm)))
(sqlite:step-statement insert-stmt))))
(defun lookup-wa-msgid-in-history (uid wa-msgid)
"Look up the XMPP ID for the WhatsApp message ID WA-MSGID, when received for the user UID."
(with-prepared-statements
((get-stmt "SELECT xmpp_id FROM user_chat_history WHERE user_id = ? AND orig_id = ?"))
(bind-parameters get-stmt uid wa-msgid)
(when (sqlite:step-statement get-stmt)
(with-bound-columns (xid) get-stmt
xid))))

View file

@ -112,6 +112,7 @@ FIXME: the above behaviour is a bit meh."
do (format oss "> ~A~%" item))
(get-output-stream-string oss)))
(defun deliver-xmpp-message (comp msg)
"Deliver MSG, an XMPP-MESSAGE, to the intended destinations on COMP."
(let* ((jid (get-user-jid (uid msg)))
@ -146,7 +147,8 @@ FIXME: the above behaviour is a bit meh."
(unless one-to-one-p
(cxml:with-element "stanza-id"
(cxml:attribute "xmlns" +unique-stanzas-ns+)
(cxml:attribute "id" (xmpp-id msg)))
(cxml:attribute "id" (xmpp-id msg))
(cxml:attribute "by" (concatenate 'string (conversation msg) "@" component-host)))
(when (orig-id msg)
(cxml:with-element "origin-id"
(cxml:attribute "xmlns" +unique-stanzas-ns+)

View file

@ -60,3 +60,15 @@ CREATE TABLE user_chat_joined (
chat_id INT NOT NULL REFERENCES user_chats,
jid VARCHAR NOT NULL
);
CREATE TABLE user_chat_history (
id INTEGER PRIMARY KEY,
user_id INT NOT NULL REFERENCES users,
chat_id INT NOT NULL REFERENCES user_chats,
user_from VARCHAR NOT NULL,
ts_unix INT NOT NULL,
xmpp_id VARCHAR NOT NULL,
orig_id VARCHAR,
body VARCHAR NOT NULL,
oob_url VARCHAR
);

View file

@ -272,6 +272,40 @@ WhatsXMPP represents users as u440123456789 and groups as g1234-5678."
(admin-presence comp jid "Programming error" "xa")
(remhash jid (component-whatsapps comp))))
(defun do-chat-history-request (comp conn jid uid requested-jid)
"Retrieves full chat history for the REQUESTED-JID, and inserts it into the database."
(whatscl::get-full-chat-history
conn requested-jid
(lambda (conn chat-history)
(with-wa-handler-context (comp conn jid)
(bt:make-thread
(lambda ()
(let ((num-inserted 0))
(loop
for msg in chat-history
do (unless (lookup-wa-msgid-in-history uid (whatscl::message-id msg))
(incf num-inserted)
(catcher
(attach (with-component-data-lock (comp)
(make-xmpp-messages-for-wa-message comp conn jid msg))
(lambda (messages)
(with-component-data-lock (comp)
(loop
for x-msg in messages
do (progn
(insert-xmpp-message x-msg)
(incf num-inserted))))))
(error (e)
(warn "Couldn't insert chat history message: ~A" e)))))
(with-component-data-lock (comp)
(admin-msg comp jid (format nil "Inserted ~A (of ~A) chat history messages for ~A." num-inserted (length chat-history) requested-jid)))))
:name (format nil "chat history insertion thread for ~A / ~A" jid requested-jid))
(if (eql (length chat-history) 0)
(admin-msg comp jid (format nil "Warning: chat history request for ~A errored, or returned no results." requested-jid))
(admin-msg comp jid (format nil "Inserting ~A chat history messages for ~A..." (length chat-history) requested-jid)))))
:sleep-secs 0.2
:chunk-size 100))
(defun wa-handle-message (comp conn jid msg delivery-type)
(declare (ignore delivery-type))
(with-wa-handler-context (comp conn jid)
@ -287,6 +321,8 @@ WhatsXMPP represents users as u440123456789 and groups as g1234-5678."
for x-msg in messages
do (progn
(deliver-xmpp-message comp x-msg)
(when (uiop:string-prefix-p "g" (conversation x-msg))
(insert-xmpp-message x-msg))
(when (orig-id x-msg)
(insert-user-message uid (xmpp-id x-msg) (orig-id x-msg))))))))
(error (e)
@ -565,6 +601,13 @@ Returns three values: avatar data (as two values), and a generalized boolean spe
(let* ((new-from (concatenate 'string orig-to "/" muc-resource))
(group-localpart (nth-value 1 (parse-jid orig-to)))
(recipients (get-user-chat-joined (get-user-id jid) group-localpart)))
(insert-xmpp-message (make-instance 'xmpp-message
:conversation group-localpart
:uid (get-user-id jid)
:from muc-resource
:timestamp (local-time:now)
:xmpp-id orig-id
:body orig-body))
(loop
for recip in recipients
do (with-message (comp recip :from new-from :id orig-id :type "groupchat")
@ -858,6 +901,22 @@ Returns three values: avatar data (as two values), and a generalized boolean spe
(reply "WhatsApp connections disabled."))
(when conn
(whatscl::close-connection conn))))
((equal body "full-history-fetch")
(let ((conn (gethash stripped (component-whatsapps comp))))
(if conn
(let ((chats (get-user-groupchats uid)))
(reply (format nil "Fetching full chat history for ~A groupchats. This will probably take a long time.~%Note that even after completion is reported, some background media uploading may be in progress."
(length chats)))
(bt:make-thread
(lambda ()
(loop
for (localpart . subject) in chats
do (progn
(with-wa-handler-context (comp conn stripped)
(do-chat-history-request comp conn stripped uid (whatsxmpp-localpart-to-wa-jid localpart)))
(sleep 0.1))))
:name "whatsxmpp chat history fetcher"))
(reply "You're not connected to WhatsApp."))))
((equal body "refresh-chats")
(let ((conn (gethash stripped (component-whatsapps comp))))
(if conn