launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #30392
[Merge] ~ines-almeida/gdpro:update-login-url into gdpro:master
Ines Almeida has proposed merging ~ines-almeida/gdpro:update-login-url into gdpro:master.
Commit message:
Update OAuth URL to login directly to the canonical domain
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~ines-almeida/gdpro/+git/gdpro/+merge/448998
Clicking the login link directly logs in to the canonical domain instead of having to select "Use Custom Domain", enter "canonical" in the textbox, and select "Continue".
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~ines-almeida/gdpro:update-login-url into gdpro:master.
diff --git a/README b/README
new file mode 100644
index 0000000..c2c22ce
--- /dev/null
+++ b/README
@@ -0,0 +1,69 @@
+<<<<<<< README
+=======
+This script automates the GDPR process for Store and Launchpad services.
+
+Currently supports SSO, Landscape and Launchpad.
+
+The list of GDPR reports/deletions is obtained from Salesforce.
+
+ * Looks at pending tasks on the list, there will be one per supported service
+ (Landscape, SSO, Launchpad)
+ * For each task, extracts the email(s) and:
+ * Does a lookup by email and gets the report.
+ * If there was a report, then it creates a google doc with the report text
+ and proper sharing permissions and adds it to the Google Drive folder
+ retrieved from the Salesforce "Legal" associated to the task.
+ * A message ("chatter") is added to the "Legal" record indicating either
+ data found or no data held.
+ * Task is marked as completed.
+
+
+Do Salesforce interaction, get needed lookup data
+Pluggable, configurable lookup modules for diverse systems
+Google doc creation / adding to card / folder (one document per system)
+
+
+== TO RUN IT ==
+
+ * Install the requirements.txt and activate the venv
+ * Create a creds.sh file with some credentials for the supported services,
+ Salesforce and Trello and some IDs for the Trello board, list and usernames
+ of the humans you're automating.
+ * Obtain a credentials.json Oauth2 file from Google API with the Drive v3 and
+ Docs v1 APIs enabled.
+ * Source creds.sh
+ * Run sar-reports.py, it'll ask before each card, so they can be verified manually.
+ * By default `sar_reports` will run for SSO and Landscape. You can choose which
+ modules to run by specifying the `--modules` command line parameter.
+ * Error handling is nil so anything goes wrong, it'll exception out.
+
+-- creds.sh
+
+```
+#!/bin/bash
+# Secret data
+export SSO_LOGIN=daniel.manrique@xxxxxxxxxxxxx
+export SSO_PASS=...
+export GDOCS_CLIENT_ID=190311...
+export GDOCS_CLIENT_SECRET=eq4...
+export LANDSCAPE_API_KEY=6IE...
+export LANDSCAPE_API_URI=https://landscape.canonical.com/api/
+export LANDSCAPE_API_SECRET=blZ...
+
+# Salesforce for querying pending GDPR requests
+export SALESFORCE_OAUTH_CLIENT_ID=3MVG99gP.VbJma8UaoXtQk6gwCaLnw4t6se.OYSumLdJtJOpafuTWJyKdKhQWRoaeYI1qBX668PZ17Rw8VU4H
+# Ping Najam for SALESFORCE_OAUTH_CLIENT_SECRET
+export SALESFORCE_OAUTH_CLIENT_SECRET=C6ED................................
+```
+
+
+Additionally a credentials.json file from google with the oauth application's key/secret
+is needed in the current directory.
+
+
+
+https://github.com/sarumont/py-trello
+https://github.com/googleapis/google-api-python-client
+https://developers.google.com/docs/api/reference/rest/
+https://pypi.org/project/simple-salesforce/
+>>>>>>> README
diff --git a/board.py b/board.py
index ed8fe18..db3ec4b 100644
--- a/board.py
+++ b/board.py
@@ -13,7 +13,8 @@ class TrelloWrangler:
def __init__(self):
self.trello = TrelloClient(
api_key=os.environ.get("TRELLO_API_KEY"),
- api_secret=os.environ.get("TRELLO_API_SECRET"),
+ token=os.environ.get("TRELLO_API_OAUTH_TOKEN"),
+ token_secret=os.environ.get("TRELLO_API_OAUTH_SECRET"),
)
def get_cards_for(self, board_id, list_id, username):
diff --git a/launchpad.py b/launchpad.py
new file mode 100644
index 0000000..fca29f8
--- /dev/null
+++ b/launchpad.py
@@ -0,0 +1,54 @@
+# Get an identifier (email address) and return either a gdpr report as json,
+# or something indicating there was no such user.
+
+import os
+import sys
+import textwrap
+
+from launchpadlib.errors import HTTPError
+from launchpadlib.launchpad import Launchpad
+
+
+def auth_failed():
+ print("Unable to authenticate with Launchpad. Please try again later")
+ sys.exit(1)
+
+
+class LaunchpadWrangler:
+ def __init__(self):
+ self.lp = Launchpad.login_with(
+ "gdpro", "production", version="devel", credential_save_failed=auth_failed
+ )
+ self.service_identifier = "Launchpad"
+ self.salesforce_owner = "GDPR - Launchpad"
+
+ def gdpr_report_for(self, usr_mail):
+ if not usr_mail:
+ return ""
+ response = self.lp.people.getUserData(email=usr_mail)
+ report = [
+ f"Launchpad data for {usr_mail}",
+ "",
+ "This document may be sent to the person requesting their data, as a GDPR Subject Access Request or otherwise.",
+ "",
+ "Data held",
+ ]
+ if response["status"] == "no data held":
+ return ""
+ elif response["status"] in (
+ "account only; no other data",
+ "account with data",
+ ):
+ report += [f"{k.title()}:\t{v}" for k, v in response.items()]
+ return "\n".join(report)
+
+
+def main():
+ ls = LaunchpadWrangler()
+ # print(ls.gdpr_report_for(os.environ.get("SSO_LOGIN")))
+ # print(ls.gdpr_report_for("lolo"))
+ print(ls.gdpr_report_for("imsp01@xxxxxxxxx"))
+
+
+if __name__ == "__main__":
+ main()
diff --git a/requirements.txt b/requirements.txt
index 9a65199..2647eb8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,11 @@
-py-trello==0.16.0
+py-trello
beautifulsoup4
mechanize
google-api-python-client
google-auth-httplib2
google-auth-oauthlib
landscape-api-py3
+launchpadlib
simple_salesforce
oauth2client
pytest
diff --git a/salesforce_oauth.py b/salesforce_oauth.py
index 33455ff..488ed8b 100644
--- a/salesforce_oauth.py
+++ b/salesforce_oauth.py
@@ -1,3 +1,4 @@
+<<<<<<< salesforce_oauth.py
# (c) 2022 Canonical, LTD
# This module implements Oauth web client flow to get Salesforce
# session credentials, to be used with SimpleSalesforce.
@@ -9,11 +10,15 @@
import json
import os
import time
+=======
+import os
+>>>>>>> salesforce_oauth.py
from urllib import parse
from threading import Thread
from http.server import ThreadingHTTPServer, BaseHTTPRequestHandler
import requests
+<<<<<<< salesforce_oauth.py
from simple_salesforce import Salesforce
SALESFORCE_OAUTH_BASE = (
@@ -113,6 +118,17 @@ def introspect_oauth_code(token):
print("Sending introspect request")
response = requests.post(parse.urlunparse(token_url), data=payload)
return response.json()
+=======
+
+SALESFORCE_OAUTH_BASE = ("https", "canonical.my.salesforce.com", "/services/oauth2/", "", "", "")
+
+
+def get_session():
+ generate_oauth_authorize_url()
+ code = run_server_and_return_response()
+ creds = validate_oauth_code(code)
+ return (creds["access_token"], creds["instance_url"])
+>>>>>>> salesforce_oauth.py
def validate_oauth_code(code):
@@ -123,16 +139,21 @@ def validate_oauth_code(code):
"client_secret": os.environ.get("SALESFORCE_OAUTH_CLIENT_SECRET"),
"redirect_uri": "http://localhost:9999",
}
+<<<<<<< salesforce_oauth.py
token_url = (
SALESFORCE_OAUTH_BASE[:2]
+ (parse.urljoin("/services/oauth2/", "token"),)
+ ("", "", "")
)
+=======
+ token_url = SALESFORCE_OAUTH_BASE[:2] + (parse.urljoin("/services/oauth2/", "token"), ) + ("", "", "")
+>>>>>>> salesforce_oauth.py
print("Sending validate request")
response = requests.post(parse.urlunparse(token_url), data=payload)
return response.json()
+<<<<<<< salesforce_oauth.py
def refresh_oauth_code(refresh_token):
payload = {
"grant_type": "refresh_token",
@@ -150,6 +171,8 @@ def refresh_oauth_code(refresh_token):
return response.json()
+=======
+>>>>>>> salesforce_oauth.py
def generate_oauth_authorize_url():
payload = {
"client_id": os.environ.get("SALESFORCE_OAUTH_CLIENT_ID"),
@@ -158,6 +181,7 @@ def generate_oauth_authorize_url():
"prompt": "login",
"scope": "chatter_api api refresh_token offline_access",
}
+<<<<<<< salesforce_oauth.py
print(
"Please open the following URL in your browser and "
"complete the OAuth procedure:"
@@ -167,6 +191,10 @@ def generate_oauth_authorize_url():
+ (parse.urljoin("/services/oauth2/", "authorize"),)
+ ("", parse.urlencode(payload), "")
)
+=======
+ print("Please open the following URL in your browser and complete the OAuth procedure:")
+ authorize_url = SALESFORCE_OAUTH_BASE[:2] + (parse.urljoin("/services/oauth2/", "authorize"), ) + ("", parse.urlencode(payload), "")
+>>>>>>> salesforce_oauth.py
print(parse.urlunparse(authorize_url))
print("We will continue once the OAuth procedure has completed.")
@@ -175,11 +203,16 @@ def run_server_and_return_response():
class SalesforceOAuthHandler(BaseHTTPRequestHandler):
def _set_response(self):
self.send_response(200)
+<<<<<<< salesforce_oauth.py
self.send_header("Content-type", "text/html")
+=======
+ self.send_header('Content-type', 'text/html')
+>>>>>>> salesforce_oauth.py
self.end_headers()
def do_GET(self):
self._set_response()
+<<<<<<< salesforce_oauth.py
self.server.response = parse.parse_qs(
parse.urlparse(self.path).query
)["code"][0]
@@ -190,6 +223,15 @@ def run_server_and_return_response():
self.server.shutdown()
server = ThreadingHTTPServer(("localhost", 9999), SalesforceOAuthHandler)
+=======
+ self.server.response = parse.parse_qs(parse.urlparse(self.path).query)["code"][0]
+ self.wfile.write("OAuth process completed. Please go back to the CLI and complete the GDPR processing there.".encode("utf-8"))
+ self.server.shutdown()
+
+ server = ThreadingHTTPServer(
+ ("localhost", 9999), SalesforceOAuthHandler
+ )
+>>>>>>> salesforce_oauth.py
def serve_forever():
server.RequestHandlerClass.server = server
@@ -200,6 +242,7 @@ def run_server_and_return_response():
server_thread.join()
server.server_close()
return server.response
+<<<<<<< salesforce_oauth.py
def main():
@@ -215,3 +258,5 @@ def main():
if __name__ == "__main__":
main()
+=======
+>>>>>>> salesforce_oauth.py
diff --git a/sar-reports.py b/sar-reports.py
new file mode 100755
index 0000000..26d4483
--- /dev/null
+++ b/sar-reports.py
@@ -0,0 +1,182 @@
+<<<<<<< sar-reports.py
+=======
+#!/usr/bin/env python
+# 1- get list of pending tasks from Salesforce
+# 2- for each task, get e-mail custom field and gdoc folder attachment
+# 3- hit SSO api for data for that email
+# 4- if found
+# A- create gdoc with that data
+# B- set sharing to "off - only specific" and set nice name
+# C- drop in gdoc folder.
+# 5- Mark task (by its ID from step 2) as "Completed"
+# Steps 3-5 are also done for Landscape; there should be separate tasks
+# for that, so we really only just walk the tasks.
+
+import os
+import re
+import sys
+import select
+import argparse
+from urllib.parse import urlparse
+
+from sso import SSOWrangler
+from gdocs import GDocWrangler
+from landscape import LandscapeWrangler
+from launchpad import LaunchpadWrangler
+from salesforce import SalesforceWrangler
+from pprint import pprint
+
+
+MODULE_LOOKUP = {
+ "sso": SSOWrangler,
+ "landscape": LandscapeWrangler,
+ "launchpad": LaunchpadWrangler,
+}
+
+
+def interruptible_wait(message, wait_for):
+ """Wait for a determined but interruptible amount of time.
+
+ Print the message, and wait for wait_for seconds, with
+ a running countdown. If ENTER is pressed, immediately continue
+ (i.e. wait is over, do the thing). If Ctrl-C is pressed,
+ entirely exit the program (cancel everything).
+ """
+ print(message)
+ for sec in range(wait_for, 0, -1):
+ print(f"\r{sec:3} seconds left", end="")
+ try:
+ i, o, e = select.select([sys.stdin], [], [], 1)
+ except KeyboardInterrupt:
+ print("Aborted!")
+ raise SystemExit(1)
+ if i:
+ print("")
+ return sys.stdin.readline().strip()
+ print("")
+ return None
+
+
+def process_task(sfwrangler, task_id, service, link_to_doc):
+ if link_to_doc is not None:
+ sfwrangler.add_chatter_for_task(
+ task_id, f"{service} report in {link_to_doc}"
+ )
+ else:
+ sfwrangler.add_chatter_for_task(task_id, f"No data held for {service}")
+
+
+def clean_email(email):
+ # Clean common crap usually seen in email addresses
+ return email.strip().rstrip(".")
+
+
+def parse_emails(emailstring):
+ # possible separators = , /
+ # assume consistent separator
+ for sep in (",", "/"):
+ addresses = emailstring.split(sep)
+ if len(addresses) > 1:
+ return [clean_email(em) for em in addresses]
+ return [clean_email(emailstring)]
+
+
+def process_for(task, module, salesforcewrangler, gdocw):
+ if module.service_identifier not in ("Launchpad", "Landscape", "SSO"):
+ print("Bad service identifier from module?")
+ return False
+ print(f"About to process {task['email']} on {module.service_identifier}")
+ if not task["gfolder"]:
+ print(
+ "This task has no google folder, check the folder URL in the task."
+ " I will skip."
+ )
+ return False
+ wait_time = int(os.environ.get("GDPR_SAR_WAIT_TIME", 10))
+ interruptible_wait(
+ f"Auto-processing in {wait_time} secs, "
+ "ENTER to do now, CTRl-C to abort all.",
+ 60,
+ )
+ # parse multiple emails here
+ emails = parse_emails(task["email"])
+ results = []
+ for email in emails:
+ print(f"Processing email {email}")
+ report = module.gdpr_report_for(email)
+ link_to_doc = None
+ if report:
+ # Use urlparse to properly remove query parameters, scheme and
+ # netloc about which we don't really care.
+ folder_chunks = urlparse(task["gfolder"])
+ # the folder ID is the last component of the path.
+ folder_id = folder_chunks.path.split("/")[-1]
+ print(f"Folder ID is {folder_id}")
+ print(report)
+ # TODO: Maybe give each module a method with which to extract a
+ # suitable title from a report, and use that instead of this ugly
+ # conditional.
+ if module.service_identifier == "Landscape":
+ title = report.split("\n")[0]
+ elif module.service_identifier == "SSO":
+ title_lines = [
+ l
+ for l in report.split("\n")
+ if re.match("^Account: .+$", l)
+ ]
+ if title_lines:
+ title = title_lines[0]
+ else:
+ title = email + " (Account has no name)"
+ elif module.service_identifier == "Launchpad":
+ title = task["legal_record_number"]
+ print(title)
+ print("*" * 50)
+ print("Creating gdoc")
+ link_to_doc = gdocw.create_doc_in(folder_id, title, report)
+ print("Processing task")
+ process_task(
+ salesforcewrangler,
+ task["task_id"],
+ module.service_identifier,
+ link_to_doc,
+ )
+ print(f"Done for {email}")
+ print(f"Done {module.service_identifier} for {email}")
+ return all(results)
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--modules", nargs="*", default=["sso", "landscape"])
+ args = parser.parse_args()
+ salesforcewrangler = SalesforceWrangler()
+ gdocw = GDocWrangler()
+ modules = [MODULE_LOOKUP[_]() for _ in args.modules]
+
+ for module in modules:
+ print(f"Now processing {module.service_identifier}")
+ tasks = salesforcewrangler.get_tasks_for(
+ module.salesforce_owner, "request"
+ )
+ pprint(tasks)
+ for task in tasks:
+ # Process with module right now
+ # print(f"Would process with module {module}")
+ task_processed = process_for(task, module, salesforcewrangler, gdocw)
+ if task_processed:
+ print(
+ f"Marking task complete for {module.service_identifier}",
+ )
+ salesforcewrangler.set_task_complete(task["task_id"])
+ else:
+ print(
+ "Card skipped, please review above for possible "
+ "reasons and redo"
+ )
+ return True
+
+
+if __name__ == "__main__":
+ main()
+>>>>>>> sar-reports.py