← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~ines-almeida/gdpro:update-login-url into gdpro:master

 

Ines Almeida has proposed merging ~ines-almeida/gdpro:update-login-url into gdpro:master.

Commit message:
Update OAuth URL to login directly to the canonical domain

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~ines-almeida/gdpro/+git/gdpro/+merge/448998

Clicking the login link directly logs in to the canonical domain instead of having to select "Use Custom Domain", enter "canonical" in the textbox, and select "Continue".
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~ines-almeida/gdpro:update-login-url into gdpro:master.
diff --git a/README b/README
new file mode 100644
index 0000000..c2c22ce
--- /dev/null
+++ b/README
@@ -0,0 +1,69 @@
+<<<<<<< README
+=======
+This script automates the GDPR process for Store and Launchpad services.
+
+Currently supports SSO, Landscape and Launchpad.
+
+The list of GDPR reports/deletions is obtained from Salesforce.
+
+ * Looks at pending tasks on the list, there will be one per supported service
+   (Landscape, SSO, Launchpad)
+ * For each task, extracts the email(s) and:
+    * Does a lookup by email and gets the report.
+    * If there was a report, then it creates a google doc with the report text
+      and proper sharing permissions and adds it to the Google Drive folder
+      retrieved from the Salesforce "Legal" associated to the task.
+    * A message ("chatter") is added to the "Legal" record indicating either
+      data found or no data held.
+    * Task is marked as completed.
+
+
+Do Salesforce interaction, get needed lookup data
+Pluggable, configurable lookup modules for diverse systems
+Google doc creation / adding to card / folder (one document per system)
+
+
+== TO RUN IT ==
+
+ * Install the requirements.txt and activate the venv
+ * Create a creds.sh file with some credentials for the supported services,
+   Salesforce and Trello and some IDs for the Trello board, list and usernames
+   of the humans you're automating.
+ * Obtain a credentials.json Oauth2 file from Google API with the Drive v3 and
+   Docs v1 APIs enabled.
+ * Source creds.sh
+ * Run sar-reports.py, it'll ask before each card, so they can be verified manually.
+ * By default `sar_reports` will run for SSO and Landscape. You can choose which
+   modules to run by specifying the `--modules` command line parameter.
+ * Error handling is nil so anything goes wrong, it'll exception out.
+
+-- creds.sh
+
+```
+#!/bin/bash
+# Secret data
+export SSO_LOGIN=daniel.manrique@xxxxxxxxxxxxx
+export SSO_PASS=...
+export GDOCS_CLIENT_ID=190311...
+export GDOCS_CLIENT_SECRET=eq4...
+export LANDSCAPE_API_KEY=6IE...
+export LANDSCAPE_API_URI=https://landscape.canonical.com/api/
+export LANDSCAPE_API_SECRET=blZ...
+
+# Salesforce for querying pending GDPR requests
+export SALESFORCE_OAUTH_CLIENT_ID=3MVG99gP.VbJma8UaoXtQk6gwCaLnw4t6se.OYSumLdJtJOpafuTWJyKdKhQWRoaeYI1qBX668PZ17Rw8VU4H
+# Ping Najam for SALESFORCE_OAUTH_CLIENT_SECRET
+export SALESFORCE_OAUTH_CLIENT_SECRET=C6ED................................
+```
+
+
+Additionally a credentials.json file from google with the oauth application's key/secret
+is needed in the current directory.
+
+
+
+https://github.com/sarumont/py-trello
+https://github.com/googleapis/google-api-python-client
+https://developers.google.com/docs/api/reference/rest/
+https://pypi.org/project/simple-salesforce/
+>>>>>>> README
diff --git a/board.py b/board.py
index ed8fe18..db3ec4b 100644
--- a/board.py
+++ b/board.py
@@ -13,7 +13,8 @@ class TrelloWrangler:
     def __init__(self):
         self.trello = TrelloClient(
             api_key=os.environ.get("TRELLO_API_KEY"),
-            api_secret=os.environ.get("TRELLO_API_SECRET"),
+            token=os.environ.get("TRELLO_API_OAUTH_TOKEN"),
+            token_secret=os.environ.get("TRELLO_API_OAUTH_SECRET"),
         )
 
     def get_cards_for(self, board_id, list_id, username):
diff --git a/launchpad.py b/launchpad.py
new file mode 100644
index 0000000..fca29f8
--- /dev/null
+++ b/launchpad.py
@@ -0,0 +1,54 @@
+# Get an identifier (email address) and return either a gdpr report as json,
+# or something indicating there was no such user.
+
+import os
+import sys
+import textwrap
+
+from launchpadlib.errors import HTTPError
+from launchpadlib.launchpad import Launchpad
+
+
+def auth_failed():
+    print("Unable to authenticate with Launchpad. Please try again later")
+    sys.exit(1)
+
+
+class LaunchpadWrangler:
+    def __init__(self):
+        self.lp = Launchpad.login_with(
+            "gdpro", "production", version="devel", credential_save_failed=auth_failed
+        )
+        self.service_identifier = "Launchpad"
+        self.salesforce_owner = "GDPR - Launchpad"
+
+    def gdpr_report_for(self, usr_mail):
+        if not usr_mail:
+            return ""
+        response = self.lp.people.getUserData(email=usr_mail)
+        report = [
+            f"Launchpad data for {usr_mail}",
+            "",
+            "This document may be sent to the person requesting their data, as a GDPR Subject Access Request or otherwise.",
+            "",
+            "Data held",
+        ]
+        if response["status"] == "no data held":
+            return ""
+        elif response["status"] in (
+            "account only; no other data",
+            "account with data",
+        ):
+            report += [f"{k.title()}:\t{v}" for k, v in response.items()]
+        return "\n".join(report)
+
+
+def main():
+    ls = LaunchpadWrangler()
+    # print(ls.gdpr_report_for(os.environ.get("SSO_LOGIN")))
+    # print(ls.gdpr_report_for("lolo"))
+    print(ls.gdpr_report_for("imsp01@xxxxxxxxx"))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/requirements.txt b/requirements.txt
index 9a65199..2647eb8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,11 @@
-py-trello==0.16.0
+py-trello
 beautifulsoup4
 mechanize
 google-api-python-client
 google-auth-httplib2
 google-auth-oauthlib
 landscape-api-py3
+launchpadlib
 simple_salesforce
 oauth2client
 pytest
diff --git a/salesforce_oauth.py b/salesforce_oauth.py
index 33455ff..488ed8b 100644
--- a/salesforce_oauth.py
+++ b/salesforce_oauth.py
@@ -1,3 +1,4 @@
+<<<<<<< salesforce_oauth.py
 # (c) 2022 Canonical, LTD
 # This module implements Oauth web client flow to get Salesforce
 # session credentials, to be used with SimpleSalesforce.
@@ -9,11 +10,15 @@
 import json
 import os
 import time
+=======
+import os
+>>>>>>> salesforce_oauth.py
 from urllib import parse
 from threading import Thread
 from http.server import ThreadingHTTPServer, BaseHTTPRequestHandler
 
 import requests
+<<<<<<< salesforce_oauth.py
 from simple_salesforce import Salesforce
 
 SALESFORCE_OAUTH_BASE = (
@@ -113,6 +118,17 @@ def introspect_oauth_code(token):
     print("Sending introspect request")
     response = requests.post(parse.urlunparse(token_url), data=payload)
     return response.json()
+=======
+
+SALESFORCE_OAUTH_BASE = ("https", "canonical.my.salesforce.com", "/services/oauth2/", "", "", "")
+
+
+def get_session():
+    generate_oauth_authorize_url()
+    code = run_server_and_return_response()
+    creds = validate_oauth_code(code)
+    return (creds["access_token"], creds["instance_url"])
+>>>>>>> salesforce_oauth.py
 
 
 def validate_oauth_code(code):
@@ -123,16 +139,21 @@ def validate_oauth_code(code):
         "client_secret": os.environ.get("SALESFORCE_OAUTH_CLIENT_SECRET"),
         "redirect_uri": "http://localhost:9999";,
     }
+<<<<<<< salesforce_oauth.py
     token_url = (
         SALESFORCE_OAUTH_BASE[:2]
         + (parse.urljoin("/services/oauth2/", "token"),)
         + ("", "", "")
     )
+=======
+    token_url = SALESFORCE_OAUTH_BASE[:2] + (parse.urljoin("/services/oauth2/", "token"), ) + ("", "", "")
+>>>>>>> salesforce_oauth.py
     print("Sending validate request")
     response = requests.post(parse.urlunparse(token_url), data=payload)
     return response.json()
 
 
+<<<<<<< salesforce_oauth.py
 def refresh_oauth_code(refresh_token):
     payload = {
         "grant_type": "refresh_token",
@@ -150,6 +171,8 @@ def refresh_oauth_code(refresh_token):
     return response.json()
 
 
+=======
+>>>>>>> salesforce_oauth.py
 def generate_oauth_authorize_url():
     payload = {
         "client_id": os.environ.get("SALESFORCE_OAUTH_CLIENT_ID"),
@@ -158,6 +181,7 @@ def generate_oauth_authorize_url():
         "prompt": "login",
         "scope": "chatter_api api refresh_token offline_access",
     }
+<<<<<<< salesforce_oauth.py
     print(
         "Please open the following URL in your browser and "
         "complete the OAuth procedure:"
@@ -167,6 +191,10 @@ def generate_oauth_authorize_url():
         + (parse.urljoin("/services/oauth2/", "authorize"),)
         + ("", parse.urlencode(payload), "")
     )
+=======
+    print("Please open the following URL in your browser and complete the OAuth procedure:")
+    authorize_url = SALESFORCE_OAUTH_BASE[:2] + (parse.urljoin("/services/oauth2/", "authorize"), ) + ("", parse.urlencode(payload), "")
+>>>>>>> salesforce_oauth.py
     print(parse.urlunparse(authorize_url))
     print("We will continue once the OAuth procedure has completed.")
 
@@ -175,11 +203,16 @@ def run_server_and_return_response():
     class SalesforceOAuthHandler(BaseHTTPRequestHandler):
         def _set_response(self):
             self.send_response(200)
+<<<<<<< salesforce_oauth.py
             self.send_header("Content-type", "text/html")
+=======
+            self.send_header('Content-type', 'text/html')
+>>>>>>> salesforce_oauth.py
             self.end_headers()
 
         def do_GET(self):
             self._set_response()
+<<<<<<< salesforce_oauth.py
             self.server.response = parse.parse_qs(
                 parse.urlparse(self.path).query
             )["code"][0]
@@ -190,6 +223,15 @@ def run_server_and_return_response():
             self.server.shutdown()
 
     server = ThreadingHTTPServer(("localhost", 9999), SalesforceOAuthHandler)
+=======
+            self.server.response = parse.parse_qs(parse.urlparse(self.path).query)["code"][0]
+            self.wfile.write("OAuth process completed. Please go back to the CLI and complete the GDPR processing there.".encode("utf-8"))
+            self.server.shutdown()
+
+    server = ThreadingHTTPServer(
+        ("localhost", 9999), SalesforceOAuthHandler
+    )
+>>>>>>> salesforce_oauth.py
 
     def serve_forever():
         server.RequestHandlerClass.server = server
@@ -200,6 +242,7 @@ def run_server_and_return_response():
     server_thread.join()
     server.server_close()
     return server.response
+<<<<<<< salesforce_oauth.py
 
 
 def main():
@@ -215,3 +258,5 @@ def main():
 
 if __name__ == "__main__":
     main()
+=======
+>>>>>>> salesforce_oauth.py
diff --git a/sar-reports.py b/sar-reports.py
new file mode 100755
index 0000000..26d4483
--- /dev/null
+++ b/sar-reports.py
@@ -0,0 +1,182 @@
+<<<<<<< sar-reports.py
+=======
+#!/usr/bin/env python
+# 1- get list of pending tasks from Salesforce
+# 2- for each task, get e-mail custom field and gdoc folder attachment
+# 3- hit SSO api for data for that email
+# 4- if found
+#   A- create gdoc with that data
+#   B- set sharing to "off - only specific" and set nice name
+#   C- drop in gdoc folder.
+# 5- Mark task (by its ID from step 2) as "Completed"
+# Steps 3-5 are also done for Landscape; there should be separate tasks
+# for that, so we really only just walk the tasks.
+
+import os
+import re
+import sys
+import select
+import argparse
+from urllib.parse import urlparse
+
+from sso import SSOWrangler
+from gdocs import GDocWrangler
+from landscape import LandscapeWrangler
+from launchpad import LaunchpadWrangler
+from salesforce import SalesforceWrangler
+from pprint import pprint
+
+
+MODULE_LOOKUP = {
+    "sso": SSOWrangler,
+    "landscape": LandscapeWrangler,
+    "launchpad": LaunchpadWrangler,
+}
+
+
+def interruptible_wait(message, wait_for):
+    """Wait for a determined but interruptible amount of time.
+
+    Print the message, and wait for wait_for seconds, with
+    a running countdown. If ENTER is pressed, immediately continue
+    (i.e. wait is over, do the thing). If Ctrl-C is pressed,
+    entirely exit the program (cancel everything).
+    """
+    print(message)
+    for sec in range(wait_for, 0, -1):
+        print(f"\r{sec:3} seconds left", end="")
+        try:
+            i, o, e = select.select([sys.stdin], [], [], 1)
+        except KeyboardInterrupt:
+            print("Aborted!")
+            raise SystemExit(1)
+        if i:
+            print("")
+            return sys.stdin.readline().strip()
+    print("")
+    return None
+
+
+def process_task(sfwrangler, task_id, service, link_to_doc):
+    if link_to_doc is not None:
+        sfwrangler.add_chatter_for_task(
+            task_id, f"{service} report in {link_to_doc}"
+        )
+    else:
+        sfwrangler.add_chatter_for_task(task_id, f"No data held for {service}")
+
+
+def clean_email(email):
+    # Clean common crap usually seen in email addresses
+    return email.strip().rstrip(".")
+
+
+def parse_emails(emailstring):
+    # possible separators = , /
+    # assume consistent separator
+    for sep in (",", "/"):
+        addresses = emailstring.split(sep)
+        if len(addresses) > 1:
+            return [clean_email(em) for em in addresses]
+    return [clean_email(emailstring)]
+
+
+def process_for(task, module, salesforcewrangler, gdocw):
+    if module.service_identifier not in ("Launchpad", "Landscape", "SSO"):
+        print("Bad service identifier from module?")
+        return False
+    print(f"About to process {task['email']} on {module.service_identifier}")
+    if not task["gfolder"]:
+        print(
+            "This task has no google folder, check the folder URL in the task."
+            " I will skip."
+        )
+        return False
+    wait_time = int(os.environ.get("GDPR_SAR_WAIT_TIME", 10))
+    interruptible_wait(
+        f"Auto-processing in {wait_time} secs, "
+        "ENTER to do now, CTRl-C to abort all.",
+        60,
+    )
+    # parse multiple emails here
+    emails = parse_emails(task["email"])
+    results = []
+    for email in emails:
+        print(f"Processing email {email}")
+        report = module.gdpr_report_for(email)
+        link_to_doc = None
+        if report:
+            # Use urlparse to properly remove query parameters, scheme and
+            # netloc about which we don't really care.
+            folder_chunks = urlparse(task["gfolder"])
+            # the folder ID is the last component of the path.
+            folder_id = folder_chunks.path.split("/")[-1]
+            print(f"Folder ID is {folder_id}")
+            print(report)
+            # TODO: Maybe give each module a method with which to extract a
+            # suitable title from a report, and use that instead of this ugly
+            # conditional.
+            if module.service_identifier == "Landscape":
+                title = report.split("\n")[0]
+            elif module.service_identifier == "SSO":
+                title_lines = [
+                    l
+                    for l in report.split("\n")
+                    if re.match("^Account: .+$", l)
+                ]
+                if title_lines:
+                    title = title_lines[0]
+                else:
+                    title = email + " (Account has no name)"
+            elif module.service_identifier == "Launchpad":
+                title = task["legal_record_number"]
+            print(title)
+            print("*" * 50)
+            print("Creating gdoc")
+            link_to_doc = gdocw.create_doc_in(folder_id, title, report)
+        print("Processing task")
+        process_task(
+            salesforcewrangler,
+            task["task_id"],
+            module.service_identifier,
+            link_to_doc,
+        )
+        print(f"Done for {email}")
+        print(f"Done {module.service_identifier} for {email}")
+    return all(results)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--modules", nargs="*", default=["sso", "landscape"])
+    args = parser.parse_args()
+    salesforcewrangler = SalesforceWrangler()
+    gdocw = GDocWrangler()
+    modules = [MODULE_LOOKUP[_]() for _ in args.modules]
+
+    for module in modules:
+        print(f"Now processing {module.service_identifier}")
+        tasks = salesforcewrangler.get_tasks_for(
+            module.salesforce_owner, "request"
+        )
+        pprint(tasks)
+        for task in tasks:
+            # Process with module right now
+            # print(f"Would process with module {module}")
+            task_processed = process_for(task, module, salesforcewrangler, gdocw)
+            if task_processed:
+                print(
+                    f"Marking task complete for {module.service_identifier}",
+                )
+                salesforcewrangler.set_task_complete(task["task_id"])
+            else:
+                print(
+                    "Card skipped, please review above for possible "
+                    "reasons and redo"
+                )
+    return True
+
+
+if __name__ == "__main__":
+    main()
+>>>>>>> sar-reports.py