-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharxiv_title
executable file
·55 lines (43 loc) · 1.57 KB
/
arxiv_title
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/bin/env python
import requests
import xml.etree.ElementTree as ET
import sys, os
def make_filename(title):
import re
# Remove special characters and replace spaces with underscores
filename = re.sub(r'[^\w\s-]', '', title).strip()
filename = re.sub(r'[\s]+', '_', filename)
# Limit the filename to a reasonable length (e.g., 255 characters)
filename = filename[:255]
# You might want to add an extension to the filename, e.g., ".pdf"
filename = f"{filename}.pdf"
return filename
def get_paper_name(arxiv_id):
# Construct the arXiv API URL
arxiv_api_url = f'https://export.arxiv.org/api/query?id_list={arxiv_id}'
# Send a GET request to the API
response = requests.get(arxiv_api_url)
if response.status_code == 200:
# Parse the response XML
root = ET.fromstring(response.text)
# Define the XML namespace
namespace = {'atom': 'http://www.w3.org/2005/Atom'}
# Find the 'entry' element within the specified namespace
entry = root.find(".//atom:entry", namespaces=namespace)
title = entry.find(".//atom:title", namespaces=namespace).text
return title
else:
return None
# Example usage
arxiv_id = '1707.08567' # Replace with the arXiv ID you want to query
arxiv_id = sys.argv[1]
arxiv_id = os.path.basename(arxiv_id)
arxiv_id = arxiv_id.strip('.pdf')
print(arxiv_id)
#sys.exit(0)
paper_name = get_paper_name(arxiv_id)
if paper_name:
print(f'Paper Title: {paper_name}')
print(make_filename(paper_name))
else:
print('Paper not found or API request failed.')