-
Notifications
You must be signed in to change notification settings - Fork 0
/
imdbq
executable file
·218 lines (172 loc) · 5.82 KB
/
imdbq
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#!/bin/bash
# This script looks up movies on IMDb, and displays information about
# them.
# Usage: imdb.sh "movie title (year)"
# (The year is optional, and only recommended for more accurate search
# results. The paranthesis around (year) are required for proper
# parsing.)
usage () {
printf '\n%s\n\n' "Usage: $(basename "$0") \"movie title (year)\""
exit
}
if [[ $# -eq 0 ]]; then
usage
fi
# This creates a function called 'uriencode', which will translate
# the special characters in any string to be URL friendly. This will be
# used in the 'imdb' function.
uriencode () {
curl -Gso /dev/null -w %{url_effective} --data-urlencode @- "" <<<"$@" | sed -E 's/..(.*).../\1/'
}
# Creates a function called 'time_calc', which will translate seconds
# into the hh:mm:ss format.
time_calc () {
s="$1"
m=0
h=0
# While $s (seconds) is equal to (or greater than) 60, clear the $s
# variable and add 1 to the $m (minutes) variable.
while [[ $s -ge 60 ]]; do
m=$(( m + 1 ))
s=$(( s - 60 ))
done
# While $m (minutes) is equal to (or greater than) 60, clear the $m
# variable and add 1 to the $h (hours) variable.
while [[ $m -ge 60 ]]; do
h=$(( h + 1 ))
m=$(( m - 60 ))
done
# While $h (hours) is equal to 100 (or greater than), clear the $h
# variable.
while [[ $h -ge 100 ]]; do
h=$(( h - 100 ))
done
printf '%02d:%02d:%02d' "$h" "$m" "$s"
}
# This creates a function called 'imdb', which will look up the movie
# name on IMDb.
# https://www.imdb.com/search/title/
# https://www.imdb.com/interfaces/
imdb () {
if [[ $# -eq 0 ]]; then
return 1
fi
mapfile -d' ' -t term < <(sed -E 's/[[:blank:]]+/ /g' <<<"$@")
term[-1]="${term[-1]%$'\n'}"
y_regex='^\(([0-9]{4})\)$'
id_regex='\/title\/(tt[0-9]+)'
title_regex1='\,\"originalTitleText\":'
title_regex2='\"text\":\"(.*)\"\,\"__typename\":\"TitleText\"'
year_regex1='\,\"releaseYear\":'
year_regex2='\"year\":([0-9]{4})\,\"endYear\":.*\,\"__typename\":\"YearRange\"'
plot_regex1='\"plotText\":'
plot_regex2='\"plainText\":\"(.*)\"\,\"__typename\":\"Markdown\"'
rating_regex1='\,\"ratingsSummary\":'
rating_regex2='\"aggregateRating\":(.*)\,\"voteCount\":.*\,\"__typename\":\"RatingsSummary\"'
genre_regex1='\"genres\":\['
genre_regex2='\"text\":\"(.*)\"\,\"id\":\".*\"\,\"__typename\":\"Genre\"'
actor_regex1='\,\"actor\":\['
actor_regex2='\"@type\":\"Person\",\"url\":\".*\"\,\"name\":\"(.*)\"'
director_regex1='\]\,\"director\":\['
director_regex2='\"@type\":\"Person\",\"url\":\".*\"\,\"name\":\"(.*)\"'
runtime_regex1='\,\"runtime\":'
runtime_regex2='\"seconds\":(.*)\,\"__typename\":\"Runtime\"'
agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'
# This function gets a URL using cURL.
get_page () {
curl --location --user-agent "$agent" --retry 10 --retry-delay 10 --connect-timeout 10 --silent "$1" 2>&-
}
# This function runs the JSON regexes and decides which JSON type is a
# list and which isn't.
get_list () {
declare string
declare -a list
declare -A lists
lists=(['genre']=1 ['actor']=1 ['director']=1)
regex_list='^,$'
z=$(( z + 1 ))
# If current JSON type is not a list, match the regex and return from
# this function.
if [[ -z ${lists[${json_type}]} ]]; then
if [[ ${tmp_array[${z}]} =~ ${!json_regex2_ref} ]]; then
eval "${json_type}"=\""${BASH_REMATCH[1]}"\"
fi
return
fi
# This loop parses JSON lists.
while [[ ${tmp_array[${z}]} =~ ${!json_regex2_ref} ]]; do
list+=("${BASH_REMATCH[1]}")
z=$(( z + 1 ))
if [[ ${tmp_array[${z}]} =~ $regex_list ]]; then
z=$(( z + 1 ))
else
z=$(( z - 1 ))
break
fi
done
string=$(printf '%s, ' "${list[@]}")
string="${string%, }"
eval "${json_type}"=\""${string}"\"
}
if [[ ${term[-1]} =~ $y_regex ]]; then
y="${BASH_REMATCH[1]}"
unset -v term[-1]
fi
t=$(uriencode "${term[@]}")
# Sets the type of IMDb search results to include.
# All currently available types:
# feature,tv_movie,tv_series,tv_episode,tv_special,tv_miniseries,
# documentary,video_game,short,video,tv_short,podcast_series,
# podcast_episode,music_video
type='feature,tv_movie,tv_special,documentary,video'
# If the $y variable is empty, that means the year is unknown, hence we
# will need to use slightly different URLs, when searching for the
# movie.
if [[ -z $y ]]; then
url_tmp="https://www.imdb.com/search/title/?title=${t}&title_type=${type}&view=simple"
else
url_tmp="https://www.imdb.com/search/title/?title=${t}&title_type=${type}&release_date=${y},${y}&view=simple"
fi
mapfile -t id_array < <(get_page "$url_tmp" | grep -Eo "$id_regex" | sed -E "s/${id_regex}/\1/")
id="${id_array[0]}"
if [[ -z $id ]]; then
return 1
fi
url="https://www.imdb.com/title/${id}/"
# Translate {} characters to newlines so we can parse the JSON data.
# I came to the conclusion that this is the most simple, reliable and
# future-proof way to get the movie information. It's possible to add
# more regex:es to the for loop below, to get additional information.
# Excluding lines that are longer than 500 characters, to make it
# slightly faster.
mapfile -t tmp_array < <(get_page "$url" | tr '{}' '\n' | grep -Ev -e '.{500}' -e '^[[:blank:]]*$')
declare -A json_types
json_types=(['title']=1 ['year']=1 ['plot']=1 ['rating']=1 ['genre']=1 ['actor']=1 ['director']=1 ['runtime']=1)
for (( z = 0; z < ${#tmp_array[@]}; z++ )); do
if [[ ${#json_types[@]} -eq 0 ]]; then
break
fi
for json_type in "${!json_types[@]}"; do
json_regex1_ref="${json_type}_regex1"
json_regex2_ref="${json_type}_regex2"
if [[ ${tmp_array[${z}]} =~ ${!json_regex1_ref} ]]; then
get_list
unset -v json_types["${json_type}"]
break
fi
done
done
runtime=$(time_calc "$runtime")
cat <<IMDB
${title} (${year})
${url}
Rating: ${rating}
Genre(s): ${genre}
Runtime: ${runtime}
Plot summary:
${plot}
Actor(s): ${actor}
Director(s): ${director}
IMDB
}
imdb "$@"