Skip to content

Commit

Permalink
Update documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
actions-user committed Feb 15, 2024
1 parent 18f9085 commit e4eab2f
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 31 deletions.
6 changes: 3 additions & 3 deletions docs/config.html
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ <h1 class="title">Module <code>mimir.config</code></h1>
&#34;Dump data to cache? Exits program after dumping&#34;
load_from_cache: Optional[bool] = False
&#34;&#34;&#34;Load data from cache?&#34;&#34;&#34;
load_from_hf: Optional[bool] = False
load_from_hf: Optional[bool] = True
&#34;&#34;&#34;Load data from HuggingFace?&#34;&#34;&#34;
blackbox_attacks: Optional[List[str]] = field(
default_factory=lambda: None
Expand Down Expand Up @@ -352,7 +352,7 @@ <h3>Class variables</h3>
</dd>
<dt id="mimir.config.ExperimentConfig"><code class="flex name class">
<span>class <span class="ident">ExperimentConfig</span></span>
<span>(</span><span>experiment_name: str, base_model: str, dataset_member: str, dataset_nonmember: str, output_name: str = None, dataset_nonmember_other_sources: Optional[List[str]] = &lt;factory&gt;, pretokenized: Optional[bool] = False, revision: Optional[str] = None, presampled_dataset_member: Optional[str] = None, presampled_dataset_nonmember: Optional[str] = None, token_frequency_map: Optional[str] = None, dataset_key: Optional[str] = None, specific_source: Optional[str] = None, full_doc: Optional[bool] = False, max_substrs: Optional[int] = 20, dump_cache: Optional[bool] = False, load_from_cache: Optional[bool] = False, load_from_hf: Optional[bool] = False, blackbox_attacks: Optional[List[str]] = &lt;factory&gt;, tokenization_attack: Optional[bool] = False, quantile_attack: Optional[bool] = False, n_samples: Optional[int] = 200, max_tokens: Optional[int] = 512, max_data: Optional[int] = 5000, min_words: Optional[int] = 100, max_words: Optional[int] = 200, max_words_cutoff: Optional[bool] = True, batch_size: Optional[int] = 50, chunk_size: Optional[int] = 20, scoring_model_name: Optional[str] = None, top_k: Optional[int] = 40, do_top_k: Optional[bool] = False, top_p: Optional[float] = 0.96, do_top_p: Optional[bool] = False, pre_perturb_pct: Optional[float] = 0.0, pre_perturb_span_length: Optional[int] = 5, tok_by_tok: Optional[bool] = False, fpr_list: Optional[List[float]] = &lt;factory&gt;, random_seed: Optional[int] = 0, ref_config: Optional[<a title="mimir.config.ReferenceConfig" href="#mimir.config.ReferenceConfig">ReferenceConfig</a>] = None, neighborhood_config: Optional[<a title="mimir.config.NeighborhoodConfig" href="#mimir.config.NeighborhoodConfig">NeighborhoodConfig</a>] = None, env_config: Optional[<a title="mimir.config.EnvironmentConfig" href="#mimir.config.EnvironmentConfig">EnvironmentConfig</a>] = None, openai_config: Optional[<a title="mimir.config.OpenAIConfig" href="#mimir.config.OpenAIConfig">OpenAIConfig</a>] = None)</span>
<span>(</span><span>experiment_name: str, base_model: str, dataset_member: str, dataset_nonmember: str, output_name: str = None, dataset_nonmember_other_sources: Optional[List[str]] = &lt;factory&gt;, pretokenized: Optional[bool] = False, revision: Optional[str] = None, presampled_dataset_member: Optional[str] = None, presampled_dataset_nonmember: Optional[str] = None, token_frequency_map: Optional[str] = None, dataset_key: Optional[str] = None, specific_source: Optional[str] = None, full_doc: Optional[bool] = False, max_substrs: Optional[int] = 20, dump_cache: Optional[bool] = False, load_from_cache: Optional[bool] = False, load_from_hf: Optional[bool] = True, blackbox_attacks: Optional[List[str]] = &lt;factory&gt;, tokenization_attack: Optional[bool] = False, quantile_attack: Optional[bool] = False, n_samples: Optional[int] = 200, max_tokens: Optional[int] = 512, max_data: Optional[int] = 5000, min_words: Optional[int] = 100, max_words: Optional[int] = 200, max_words_cutoff: Optional[bool] = True, batch_size: Optional[int] = 50, chunk_size: Optional[int] = 20, scoring_model_name: Optional[str] = None, top_k: Optional[int] = 40, do_top_k: Optional[bool] = False, top_p: Optional[float] = 0.96, do_top_p: Optional[bool] = False, pre_perturb_pct: Optional[float] = 0.0, pre_perturb_span_length: Optional[int] = 5, tok_by_tok: Optional[bool] = False, fpr_list: Optional[List[float]] = &lt;factory&gt;, random_seed: Optional[int] = 0, ref_config: Optional[<a title="mimir.config.ReferenceConfig" href="#mimir.config.ReferenceConfig">ReferenceConfig</a>] = None, neighborhood_config: Optional[<a title="mimir.config.NeighborhoodConfig" href="#mimir.config.NeighborhoodConfig">NeighborhoodConfig</a>] = None, env_config: Optional[<a title="mimir.config.EnvironmentConfig" href="#mimir.config.EnvironmentConfig">EnvironmentConfig</a>] = None, openai_config: Optional[<a title="mimir.config.OpenAIConfig" href="#mimir.config.OpenAIConfig">OpenAIConfig</a>] = None)</span>
</code></dt>
<dd>
<div class="desc"><p>Config for attacks</p></div>
Expand Down Expand Up @@ -403,7 +403,7 @@ <h3>Class variables</h3>
&#34;Dump data to cache? Exits program after dumping&#34;
load_from_cache: Optional[bool] = False
&#34;&#34;&#34;Load data from cache?&#34;&#34;&#34;
load_from_hf: Optional[bool] = False
load_from_hf: Optional[bool] = True
&#34;&#34;&#34;Load data from HuggingFace?&#34;&#34;&#34;
blackbox_attacks: Optional[List[str]] = field(
default_factory=lambda: None
Expand Down
42 changes: 14 additions & 28 deletions docs/custom_datasets.html
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,20 @@ <h1 class="title">Module <code>mimir.custom_datasets</code></h1>

DATASETS = [&#39;writing&#39;, &#39;english&#39;, &#39;german&#39;, &#39;pubmed&#39;]

SOURCES_UPLOADED = [
&#34;arxiv&#34;,
&#34;dm_mathematics&#34;,
&#34;github&#34;,
&#34;hackernews&#34;,
&#34;pile_cc&#34;,
&#34;pubmed_central&#34;,
&#34;wikipedia_(en)&#34;,
&#34;full_pile&#34;,
&#34;c4&#34;,
&#34;temporal_arxiv&#34;,
&#34;temporal_wiki&#34;
]


def load_pubmed(cache_dir):
data = datasets.load_dataset(&#39;pubmed_qa&#39;, &#39;pqa_labeled&#39;, split=&#39;train&#39;, cache_dir=cache_dir)
Expand Down Expand Up @@ -70,20 +84,6 @@ <h1 class="title">Module <code>mimir.custom_datasets</code></h1>
if not filename.startswith(&#34;the_pile&#34;):
raise ValueError(f&#34;HuggingFace data only available for The Pile.&#34;)

SOURCES_UPLOADED = [
&#34;arxiv&#34;,
&#34;dm_mathematics&#34;,
&#34;github&#34;,
&#34;hackernews&#34;,
&#34;pile_cc&#34;,
&#34;pubmed_central&#34;,
&#34;wikipedia_(en)&#34;,
&#34;full_pile&#34;,
&#34;c4&#34;,
&#34;temporal_arxiv&#34;,
&#34;temporal_wiki&#34;
]

for source in SOURCES_UPLOADED:
# Got a match
if source in filename and filename.startswith(f&#34;the_pile_{source}&#34;):
Expand Down Expand Up @@ -292,20 +292,6 @@ <h2 class="section-title" id="header-functions">Functions</h2>
if not filename.startswith(&#34;the_pile&#34;):
raise ValueError(f&#34;HuggingFace data only available for The Pile.&#34;)

SOURCES_UPLOADED = [
&#34;arxiv&#34;,
&#34;dm_mathematics&#34;,
&#34;github&#34;,
&#34;hackernews&#34;,
&#34;pile_cc&#34;,
&#34;pubmed_central&#34;,
&#34;wikipedia_(en)&#34;,
&#34;full_pile&#34;,
&#34;c4&#34;,
&#34;temporal_arxiv&#34;,
&#34;temporal_wiki&#34;
]

for source in SOURCES_UPLOADED:
# Got a match
if source in filename and filename.startswith(f&#34;the_pile_{source}&#34;):
Expand Down

0 comments on commit e4eab2f

Please sign in to comment.