Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] OpenID connect client library - eyeing toward deploying keycloak #299

Open
wants to merge 26 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
9d528f7
OpenID connect client library - eyeing toward deploying keycloak
ntai-arxiv Jul 26, 2024
4765e51
Work dump
ntai-arxiv Aug 6, 2024
c1d4037
Add JWT token making interface
ntai-arxiv Aug 6, 2024
7ccb479
User claim dictionary slightly smaller. Apprently, nginx default head…
ntai-arxiv Aug 9, 2024
0a76ef3
Redo - simplify
ntai-arxiv Aug 9, 2024
e4d4bfd
Update Role names - less cryptic.
ntai-arxiv Aug 9, 2024
03df0e9
Add client secret support.
ntai-arxiv Aug 11, 2024
5b6c747
logout URL is no longer a proprety, it's now a function and you can p…
ntai-arxiv Aug 20, 2024
de6dbcd
oops.
ntai-arxiv Aug 20, 2024
1f0b695
naming is hard.
ntai-arxiv Aug 20, 2024
ecfc74c
Redo the oidc and user claims - make cookie/claims smaller.
ntai-arxiv Aug 21, 2024
4934e21
:(
ntai-arxiv Aug 21, 2024
49a18a8
Start of bridging the oauth2 cookie to legacy.
ntai-arxiv Aug 21, 2024
cc0597a
some progerss made.
ntai-arxiv Aug 23, 2024
2ee7f64
1 - Add "aud" checking to pass for oidc_idp.py
ntai-arxiv Aug 27, 2024
190a236
Ues all but "aud" token verity.
ntai-arxiv Aug 30, 2024
f23b810
Support refreshing access token. The claims now includes the refresh …
ntai-arxiv Sep 3, 2024
ca287e1
" " space was a bad choice for the delimiter.
ntai-arxiv Sep 4, 2024
4c6b9ac
Finish off implementing the token refresh. The refresh is untested fo…
ntai-arxiv Sep 6, 2024
80f658c
Nit fix, and use user_id rather than email for setting up Tapir. user…
ntai-arxiv Sep 13, 2024
44ed197
Merge branch 'develop' into ntai/openid-connect-step-1
ntai-arxiv Sep 13, 2024
df06d5a
Remove the "transaction" thing. It broke. Also fix a stupidity.
ntai-arxiv Sep 13, 2024
bfb93c6
Change the token format and make it future proof by a version prefix.
ntai-arxiv Sep 17, 2024
a86117c
Include the token expiration timestamp in the return value of encode_…
ntai-arxiv Sep 23, 2024
1ca5c01
refresh token, the function now only need the refresh token only rath…
ntai-arxiv Sep 23, 2024
683177a
Try not using access token (usu payload) instead for the user claims.…
ntai-arxiv Sep 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -128,4 +128,6 @@ dist/
.sass-cache/
.pytest_cache/

fastly_hourly_stats.ini
fastly_hourly_stats.ini

*.db-journal
Empty file added arxiv/auth/openid/__init__.py
Empty file.
91 changes: 91 additions & 0 deletions arxiv/auth/openid/oidc_idp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
OpenID connect IdP client
"""
from typing import List
import requests
import jwt
from jwt.algorithms import RSAAlgorithm, RSAPublicKey


class ArxivOidcIdpClient:
"""arXiv OpenID Connect IdP client
This is implemented for Keycloak. If the APIs different, you may want to subclass/adjust.
"""

server_url: str
client_id: str
client_secret: str | None
realm: str
redirect_uri: str #
scope: List[str] # it's okay to be empty. Keycloak should be configured to provide scopes.
_server_certs: dict # Cache for the IdP certs

def __init__(self, redirect_uri: str,
server_url: str = "https://openid.arxiv.org",
realm: str ="arxiv",
client_id: str = "arxiv-user",
scope: List[str] = [],
client_secret: str | None = None):
self.server_url = server_url
self.realm = realm
self.client_id = client_id
self.client_secret = client_secret
self.redirect_uri = redirect_uri
self.scope = scope if scope else ["openid", "profile", "email"]
self._server_certs = {}
pass

@property
def oidc(self) -> str:
return f'{self.server_url}/realms/{self.realm}/protocol/openid-connect'


@property
def auth_url(self) -> str:
return self.oidc + '/auth'

@property
def token_url(self) -> str:
return self.oidc + '/token'

@property
def certs_url(self) -> str:
return self.oidc + '/certs'

@property
def login_url(self) -> str:
scope = "&" + ",".join(self.scope) if self.scope else ""
return f'{self.auth_url}?client_id={self.client_id}&redirect_uri={self.redirect_uri}&response_type=code{scope}'


@property
def server_certs(self) -> dict:
if not self._server_certs:
certs_response = requests.get(self.certs_url)
self._server_certs = certs_response.json()
return self._server_certs


def get_public_key(self, kid: str) -> RSAPublicKey | None:
for key in self.server_certs['keys']:
if key['kid'] == kid:
pkey = RSAAlgorithm.from_jwk(key)
if isinstance(pkey, RSAPublicKey):
return pkey
return None

def validate_token(self, token: str) -> dict | None:
try:
unverified_header = jwt.get_unverified_header(token)
kid = unverified_header['kid'] # key id
algorithm = unverified_header['alg'] # key algo
public_key = self.get_public_key(kid)
if public_key is None:
return None
decoded_token: dict = jwt.decode(token, public_key, algorithms=[algorithm])
return dict(decoded_token)
except jwt.ExpiredSignatureError:
return None
except jwt.InvalidTokenError:
return None
# not reached
Empty file.
44 changes: 44 additions & 0 deletions arxiv/auth/openid/tests/test_keycloak.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import subprocess
import pytest
from selenium import webdriver
from selenium.webdriver.common.by import By
import time

@pytest.fixture(scope="module")
def web_driver() -> webdriver.Chrome:
# Set up the Selenium WebDriver
_web_driver = webdriver.Chrome() # Ensure you have ChromeDriver installed and in PATH
_web_driver.implicitly_wait(10) # Wait for elements to be ready
yield _web_driver
_web_driver.quit() # Close the browser window after tests

@pytest.fixture(scope="module")
def toy_flask():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unless there is some specific reason to run flask in a sub process, consider running it as described here https://flask.palletsprojects.com/en/3.0.x/testing/

In the past we've written tests like this to use a client with cookies, get the form, post the form with login and then get a protected page.

Now I'm seeing that the web_driver is requesting to flask running in the subprocess. This actually seems fine.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we are spinning up a local flask in a subprocess, can we spin up a local keycloak too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keycloak (KC) can run locally as a fixture, once what I want to test is more concrete. I needed to see what comes back from KC while changing the settings of KC - such as KC's notion of groups, roles, attributes you can define to the user. For example, I haven't been able to get the user's group in the oauth2 reply.

To make this a fixture, I need to run KC docker, and run a set of REST API calls to set up the settings (create the realm, client ID, client callback, etc.)

This PR is more for not losing work and exploring what I can do with KC. Right now, this is a test bench, and at some point, I will turn this into real regression test.

flask_app = subprocess.Popen(["python3", "-m", "arxiv.auth.openid.tests.toy_flask"])
time.sleep(5) # Give the server time to start
yield flask_app
# Stop the Flask app
flask_app.terminate()
flask_app.wait()

def test_login(web_driver, toy_flask):
web_driver.get("http://localhost:5000/login") # URL of your Flask app's login route

# Simulate user login on the IdP login page
# Replace the following selectors with the actual ones from your IdP login form
username_field = web_driver.find_element(By.ID, "username") # Example selector
password_field = web_driver.find_element(By.ID, "password") # Example selector
login_button = web_driver.find_element(By.ID, "kc-login") # Example selector

# Enter credentials
username_field.send_keys("testuser")
password_field.send_keys("testpassword")
login_button.click()

# Wait for the redirect back to the Flask app
time.sleep(5)

# Check if the login was successful by verifying the presence of a specific element or text
web_driver.get("http://localhost:5000/protected") # URL of your protected route
body_text = web_driver.find_element(By.TAG_NAME, "body").text
assert "Token is valid" in body_text
91 changes: 91 additions & 0 deletions arxiv/auth/openid/tests/toy_flask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import os

from flask import Flask, redirect, request, session, url_for, jsonify
from ..oidc_idp import ArxivOidcIdpClient

import requests

KEYCLOAK_SERVER_URL = os.environ.get('KEYCLOAK_SERVER_URL', 'localhost')
#REALM_NAME = 'arxiv'
#CLIENT_ID = 'arxiv-user'
#CLIENT_SECRET = 'your-client-secret'
#REDIRECT_URI = 'http://localhost:5000/callback'

class ToyFlask(Flask):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.secret_key = 'secret' # Replace with a secure secret key
self.idp = ArxivOidcIdpClient("http://localhost:5000/callback",
server_url=KEYCLOAK_SERVER_URL)

app = ToyFlask(__name__)


@app.route('/')
def home():
session.clear()
return redirect('/login')

@app.route('/login')
def login():
return redirect(app.idp.login_url)


@app.route('/callback')
def callback():
# Get the authorization code from the callback URL
code = request.args.get('code')

# Exchange the authorization code for an access token
token_response = requests.post(
app.idp.token_url,
data={
'grant_type': 'authorization_code',
'code': code,
'redirect_uri': app.idp.redirect_uri,
'client_id': app.idp.client_id,
}
)

if token_response.status_code != 200:
session.clear()
return 'Something is wrong'

# Parse the token response
token_json = token_response.json()
access_token = token_json.get('access_token')
refresh_token = token_json.get('refresh_token')

# Store tokens in session (for demonstration purposes)
session['access_token'] = access_token
session['refresh_token'] = refresh_token

print(app.idp.validate_token(access_token))
return 'Login successful!'


@app.route('/logout')
def logout():
# Clear the session and redirect to home
session.clear()
return redirect(url_for('home'))


@app.route('/protected')
def protected():
access_token = session.get('access_token')
if not access_token:
return redirect(app.idp.login_url)

decoded_token = app.idp.validate_token(access_token)
if not decoded_token:
return jsonify({'error': 'Invalid token'}), 401

return jsonify({'message': 'Token is valid', 'token': decoded_token})


def create_app():
return app

if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=5000)
Loading
Loading