Skip to content

Commit

Permalink
Merge pull request #390 from ajwheeler/exomol_linelists
Browse files Browse the repository at this point in the history
parsing exomol linelists
  • Loading branch information
ajwheeler authored Jan 2, 2025
2 parents b9d035f + 54bab06 commit 38990bd
Show file tree
Hide file tree
Showing 11 changed files with 918 additions and 15 deletions.
4 changes: 4 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ version = "0.41.1"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
Expand All @@ -23,12 +24,14 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
TableOperations = "ab02a1b2-a7df-11e8-156e-fb1833f50b87"
Trapz = "592b5752-818d-11e9-1e9a-2b8ca4a44cd1"

[compat]
CSV = "0.8, 0.9, 0.10"
Compat = "4.10 - 4"
DSP = "0.7, 0.8"
DataFrames = "1.7.0"
DiffResults = "1"
FastGaussQuadrature = "0.4, 0.5, 1"
ForwardDiff = "0.10"
Expand All @@ -45,5 +48,6 @@ SparseArrays = "1.7 - 1"
SpecialFunctions = "1.4, 2"
StaticArrays = "1"
Statistics = "1"
TableOperations = "1.2.0"
Trapz = "2"
julia = "1.7 - 1"
11 changes: 6 additions & 5 deletions docs/src/API.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
This page documents the Korg API for users of the package. Low-level functions that only people
digging into the internals of Korg will be interested in can be found in the
This page documents the Korg API for users of the package. Low-level functions that only people
digging into the internals of Korg will be interested in can be found in the
[Developer documentation](@ref).

# [Top-level functions](@id API)
If you are synthesize a spectrum Korg, these are the functions you will call.
If you are synthesize a spectrum Korg, these are the functions you will call.
These functions are exported, so if you do `using Korg`, you can call them unqualified (i.e.
`synthesize` instead of `Korg.synthesize`).
`synthesize` instead of `Korg.synthesize`).

```@docs
synthesize
read_linelist
load_ExoMol_linelist
read_model_atmosphere
interpolate_marcs
format_A_X
Expand All @@ -30,7 +31,7 @@ Korg.Fit.ews_to_stellar_parameters
```

# Secondary functions
You don't need use these to synthesize spectra, but they might be relevant depending on what you are
You don't need use these to synthesize spectra, but they might be relevant depending on what you are
doing.

## Postprocessing
Expand Down
2 changes: 1 addition & 1 deletion src/Korg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ include("qfactors.jl") # formalism to compute theoretical RV pre
@compat public get_APOGEE_DR17_linelist, get_GALAH_DR3_linelist, get_GES_linelist, save_linelist,
Fit, apply_LSF, compute_LSF_matrix, air_to_vacuum, vacuum_to_air, line_profile,
blackbody, prune_linelist, merge_close_lines, Line
export synthesize, read_linelist, read_model_atmosphere, interpolate_marcs,
export synthesize, read_linelist, load_ExoMol_linelist, read_model_atmosphere, interpolate_marcs,
format_A_X
end # module
2 changes: 1 addition & 1 deletion src/constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ const eV_to_cgs = 1.602e-12 # ergs per eV

const kboltz_eV = 8.617333262145e-5 #eV/K
const hplanck_eV = 4.135667696e-15 #eV*s
const RydbergH_eV = 13.598287264 #eV
const RydbergH_eV = 13.598287264 #eV
const Rydberg_eV = 13.605693122994 #eV 2018 CODATA via wikipedia
100 changes: 98 additions & 2 deletions src/linelist.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using CSV, HDF5, LazyArtifacts
using CSV, HDF5, LazyArtifacts, TableOperations
using DataFrames: DataFrame, leftjoin, leftjoin!, rename!
using Pkg.Artifacts: @artifact_str

#This type represents an individual line.
Expand Down Expand Up @@ -165,6 +166,101 @@ function approximate_gammas(wl, species, E_lower; ionization_energies=ionization
γstark, log_γvdW
end

"""
load_ExoMol_linelist(specs, states_file, transitions_file, upper_wavelength, lower_wavelength)
Load a linelist from ExoMol. Returns a vector of [`Line`](@ref)s, the same as [`read_linelist`](@ref).
# Arguments
- `spec`: the species, i.e. the molecule that the linelist is for
- `states_file`: the path to the ExoMol states file
- `transitions_file`: the path to the ExoMol, transitions file
- `upper_wavelength`: the upper limit of the wavelength range to load (Å)
- `lower_wavelength`: the lower limit of the wavelength range to load (Å)
# Keyword Arguments
- `line_strength_cutoff`: the cutoff for the line strength (default: -15) used to filter the
linelist. See [`approximate_line_strength`](@ref) for more information.
- `T_line_strength`: the temperature (K) at which to evaluate the line strength (default: 3500.0)
!!! warning
This functionality is in beta.
"""
function load_ExoMol_linelist(spec, states_file, transitions_file, ll, ul;
line_strength_cutoff=-15, T_line_strength=3500.0)
if spec isa AbstractString
spec = Species(spec)
end
@info "Loading ExoMol linelist from $states_file and $transitions_file. This functionality is experimental. Please report any issues."

if !occursin("states", states_file) || !occursin("trans", transitions_file)
@info "The states and transitions files, $states_file and $transitions_file, don't contain the string 'states' or 'trans', respectively. You may have mixed them up."
end

# These contortions allow us to read only the first N columns, without parsing the rest.
# This is (probably) faster, and more memory efficient. But also the ExoMol files sometimes
# have various extra columns. Hopefully we can cound on the first three being consistent.
raw_transitions = CSV.File(transitions_file; delim=" ", ignorerepeated=true, header=false) |>
TableOperations.select(:Column1, :Column2, :Column3) |>
DataFrame
rename!(raw_transitions, :Column1 => :id_upper, :Column2 => :id_lower, :Column3 => :A)
states = CSV.File(states_file; delim=" ", ignorerepeated=true, header=false) |>
TableOperations.select(:Column1, :Column2, :Column3) |>
DataFrame
rename!(states, :Column1 => :id, :Column2 => :E_wavenumber, :Column3 => :g)

# join the transitions and states tables on the id column to get the upper and lower level info
transitions = leftjoin(raw_transitions, states; on=:id_upper => :id)
rename!(transitions, :E_wavenumber => :wavenumber_upper, :g => :g_upper)
leftjoin!(transitions, states; on=:id_lower => :id)
rename!(transitions, :E_wavenumber => :wavenumber_lower, :g => :g_lower)

# if the column is missing, the join didn't find a matching state
if any(ismissing.(transitions.g_lower)) || any(ismissing.(transitions.g_upper))
throw(ArgumentError("Some of the transitions in $transitions_file can't be mapped to states in $states_file"))
end

# Gray 4th ed, eq 11.12 (page 214) but with an extra factor of 1/4π for stradian vs unit sphere
# difference of 1e16 is due to Å vs cm
prefactor = (Korg.electron_mass_cgs * Korg.c_cgs) / (8π^2 * Korg.electron_charge_cgs^2)

transitions.wavenumber = transitions.wavenumber_upper .- transitions.wavenumber_lower
transitions.f = @. transitions.A * prefactor * transitions.g_upper /
(transitions.g_lower * transitions.wavenumber^2)
transitions.log_gf = log10.(transitions.g_lower .* transitions.f)

isotopic_correction = log10(prod(maximum(last.(collect(values(Korg.isotopic_abundances[Z]))))
for Z in get_atoms(spec.formula)))
@info "Applying isotopic correction of $isotopic_correction to all log gf values. (Assuming most abundant isotope for all atoms.)"
transitions.log_gf .+= isotopic_correction

transitions.E_lower = @. Korg.hplanck_eV * transitions.wavenumber_lower * Korg.c_cgs
transitions.wavelength = 1.0 ./ transitions.wavenumber

region = transitions[ll.<transitions.wavelength.*1e8.<ul, :]

lines = map(eachrow(region)) do row
Korg.Line(row.wavelength, row.log_gf, spec, row.E_lower)
end |> reverse
lines = lines[approximate_line_strength.(lines, T_line_strength).>line_strength_cutoff]
sort!(lines; by=l -> l.wl)
lines
end

"""
approximate_line_strength(line::Line, T)
Approximate the line strength (`log10(gfλ) - θχ`, in arbitrary units) of a line at temperature
`T` (K). This can be used to very quickly filter a linelist, and is used to filter very large
molecular linelists from ExoMol (see [`load_ExoMol_linelist`](@ref)).
"""
function approximate_line_strength(line::Line, T)
line.log_gf + log10(line.wl) - log10(ℯ) * line.E_lower / (Korg.kboltz_eV * T)
end

"""
read_linelist(filename; format="vald", isotopic_abundances=Korg.isotopic_abundances)
Expand Down Expand Up @@ -205,7 +301,7 @@ a dict mapping atomic number to a dict mapping from atomic weight to abundance.
Be warned that for linelists which are pre-scaled for isotopic abundance, the estimation of
radiative broadening from log(gf) is not accurate.
See also [`save_linelist`](@ref).
See also: [`load_ExoMol_linelist`](@ref), [`save_linelist`](@ref).
"""
function read_linelist(fname::String;
format=endswith(fname, ".h5") ? "korg" : "vald",
Expand Down
38 changes: 32 additions & 6 deletions src/species.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ const MAX_ATOMS_PER_MOLECULE = 6
Represents an atom or molecule, irrespective of its charge.
"""
struct Formula
# Support molecules with up to MAX_ATOMS_PER_MOLECULE atoms.
# Support molecules with up to MAX_ATOMS_PER_MOLECULE atoms.
# Unlike tuples, SVectors support sorting, which is why we use them here.
atoms::SVector{6,UInt8}

Expand All @@ -30,7 +30,7 @@ struct Formula
"""
Formula(code::String)
Construct a Formula from an encoded string form. This can be a MOOG-style numeric code,
Construct a Formula from an encoded string form. This can be a MOOG-style numeric code,
i.e. "0801" for OH, or an atomic or molecular symbol, i.e. "FeH", "Li", or "C2".
"""
function Formula(code::AbstractString)
Expand Down Expand Up @@ -117,7 +117,33 @@ function n_atoms(f::Formula)
end

# it's important that this produces something parsable by the constructor
Base.show(io::IO, f::Formula) = print(io, *([atomic_symbols[i] for i in f.atoms if i != 0]...))
function Base.show(io::IO, f::Formula)
i = findfirst(f.atoms .!= 0)
@assert !isnothing(i) # formula with no atoms should be allowed by the constructor

a = f.atoms[i]
n_of_this_atom = 1
i += 1
while i <= length(f.atoms)
if f.atoms[i] == a
n_of_this_atom += 1
else
formula_show_helper(io, a, n_of_this_atom)
a = f.atoms[i]
n_of_this_atom = 1
end
i += 1
end
formula_show_helper(io, a, n_of_this_atom)
end

function formula_show_helper(io::IO, atom, n_of_this_atom)
if n_of_this_atom == 1
print(io, atomic_symbols[atom])
else
print(io, atomic_symbols[atom], n_of_this_atom)
end
end

"""
ismolecule(f::Formula)
Expand Down Expand Up @@ -183,19 +209,19 @@ representing the species. `code` can be either a string or a float.
function Species(code::AbstractString)
code = strip(code, ['0', ' ']) # leading 0s are safe to remove

# if the species ends in "+" or "-", convert it to a numerical charge. Remember, the ionization
# if the species ends in "+" or "-", convert it to a numerical charge. Remember, the ionization
# number is the charge+1, so for us "H 0" is H⁻ and "H 2" in H⁺.
if code[end] == '+'
code = code[1:end-1] * " 2"
elseif code[end] == '-'
code = code[1:end-1] * " 0"
end

# these are the valid separators between the atomic number part of a species code and the
# these are the valid separators between the atomic number part of a species code and the
# charge-containing part. For example, "01.01" parses the same as "01 01", but "01,01" fails.
# Or, "C 2" parses the same at "C.2". "-"s can't be separators as they can be minus signs.
toks = split(code, [' ', '.', '_'])
# this allows for leading, trailing, and repeat separators. "01.01" parses the same as
# this allows for leading, trailing, and repeat separators. "01.01" parses the same as
# ".01..01.".
filter!(!=(""), toks)

Expand Down
Loading

0 comments on commit 38990bd

Please sign in to comment.