diff --git a/CURATED_SET/curated_service/add_h2bv_march2024.ipynb b/CURATED_SET/curated_service/add_h2bv_march2024.ipynb
new file mode 100644
index 00000000..aa655522
--- /dev/null
+++ b/CURATED_SET/curated_service/add_h2bv_march2024.ipynb
@@ -0,0 +1,1209 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "/mnt/scratch/l_singh/hdb/project_dir/histonedb/CURATED_SET\n"
+ ]
+ }
+ ],
+ "source": [
+ "%cd '..'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import inspect\n",
+ "import os\n",
+ "import re\n",
+ "import sys\n",
+ "\n",
+ "import pandas as pd\n",
+ "from Bio import Entrez, SeqIO\n",
+ "from curated_set_services import CuratedSet"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((561, 16),\n",
+ " Index(['accession', 'type', 'variant_group', 'variant', 'doublet', 'gi',\n",
+ " 'ncbi_gene_id', 'hgnc_gene_name', 'taxonomy_id', 'organism', 'phylum',\n",
+ " 'class', 'taxonomy_group', 'info', 'references', 'sequence'],\n",
+ " dtype='object'))"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load data from histones.csv\n",
+ "curated_set = CuratedSet()\n",
+ "cs = curated_set\n",
+ "\n",
+ "cs.data.shape, cs.data.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "list(cs.has_duplicates())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " accession | \n",
+ " type | \n",
+ " variant_group | \n",
+ " variant | \n",
+ " doublet | \n",
+ " gi | \n",
+ " ncbi_gene_id | \n",
+ " hgnc_gene_name | \n",
+ " taxonomy_id | \n",
+ " organism | \n",
+ " phylum | \n",
+ " class | \n",
+ " taxonomy_group | \n",
+ " info | \n",
+ " references | \n",
+ " sequence | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [accession, type, variant_group, variant, doublet, gi, ncbi_gene_id, hgnc_gene_name, taxonomy_id, organism, phylum, class, taxonomy_group, info, references, sequence]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cs.data[cs.data[\"accession\"] == \"AAO24603.1\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " accession | \n",
+ " type | \n",
+ " variant_group | \n",
+ " variant | \n",
+ " doublet | \n",
+ " gi | \n",
+ " ncbi_gene_id | \n",
+ " hgnc_gene_name | \n",
+ " taxonomy_id | \n",
+ " organism | \n",
+ " phylum | \n",
+ " class | \n",
+ " taxonomy_group | \n",
+ " info | \n",
+ " references | \n",
+ " sequence | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " NP_505463.1 | \n",
+ " NP_505463.1 | \n",
+ " H2A | \n",
+ " cH2A | \n",
+ " cH2A_(Animals) | \n",
+ " | \n",
+ " 17562014 | \n",
+ " | \n",
+ " | \n",
+ " 6239 | \n",
+ " Caenorhabditis elegans | \n",
+ " Nematoda | \n",
+ " Chromadorea | \n",
+ " | \n",
+ " | \n",
+ " 26989147 22650316(?) | \n",
+ " MSGRGKGGKAKTGGKAKSRSSRAGLQFPVGRLHRILRKGNYAQRVG... | \n",
+ "
\n",
+ " \n",
+ " EEC09557.1 | \n",
+ " EEC09557.1 | \n",
+ " H2A | \n",
+ " cH2A | \n",
+ " cH2A_(Animals) | \n",
+ " | \n",
+ " 215500063 | \n",
+ " | \n",
+ " | \n",
+ " 6945 | \n",
+ " Ixodes scapularis | \n",
+ " Arthropoda | \n",
+ " Arachnida | \n",
+ " | \n",
+ " | \n",
+ " 26989147 22650316(?) | \n",
+ " MSGRGKGGKVKGKSKTRSSRAGLQFPVGRIHRLLRKGNYAERVGAG... | \n",
+ "
\n",
+ " \n",
+ " NP_724343.1 | \n",
+ " NP_724343.1 | \n",
+ " H2A | \n",
+ " cH2A | \n",
+ " cH2A_(Animals) | \n",
+ " | \n",
+ " 24585673 | \n",
+ " | \n",
+ " | \n",
+ " 7227 | \n",
+ " Drosophila melanogaster | \n",
+ " Arthropoda | \n",
+ " Insecta | \n",
+ " | \n",
+ " | \n",
+ " 26989147 22650316(?) | \n",
+ " MSGRGKGGKVKGKAKSRSNRAGLQFPVGRIHRLLRKGNYAERVGAG... | \n",
+ "
\n",
+ " \n",
+ " XP_001119899.1 | \n",
+ " XP_001119899.1 | \n",
+ " H2A | \n",
+ " cH2A | \n",
+ " cH2A_(Animals) | \n",
+ " | \n",
+ " 110764935 | \n",
+ " | \n",
+ " | \n",
+ " 7460 | \n",
+ " Apis mellifera | \n",
+ " Arthropoda | \n",
+ " Insecta | \n",
+ " | \n",
+ " | \n",
+ " 26989147 22650316(?) | \n",
+ " MSGRGKGGKAKAKAKSRSNRAGLQFPVGRIHRLLRKGNYAERVGAG... | \n",
+ "
\n",
+ " \n",
+ " EDO48405.1 | \n",
+ " EDO48405.1 | \n",
+ " H2A | \n",
+ " cH2A | \n",
+ " cH2A_(Animals) | \n",
+ " | \n",
+ " 156227602 | \n",
+ " | \n",
+ " | \n",
+ " 45351 | \n",
+ " Nematostella vectensis | \n",
+ " Cnidaria | \n",
+ " Anthozoa | \n",
+ " | \n",
+ " | \n",
+ " 26989147 22650316(?) | \n",
+ " MSGRGKGKAKGTKSKTRSSRAGLQFPVGRIHRHLRKGNYAERVGAG... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " accession type variant_group variant doublet \\\n",
+ "NP_505463.1 NP_505463.1 H2A cH2A cH2A_(Animals) \n",
+ "EEC09557.1 EEC09557.1 H2A cH2A cH2A_(Animals) \n",
+ "NP_724343.1 NP_724343.1 H2A cH2A cH2A_(Animals) \n",
+ "XP_001119899.1 XP_001119899.1 H2A cH2A cH2A_(Animals) \n",
+ "EDO48405.1 EDO48405.1 H2A cH2A cH2A_(Animals) \n",
+ "\n",
+ " gi ncbi_gene_id hgnc_gene_name taxonomy_id \\\n",
+ "NP_505463.1 17562014 6239 \n",
+ "EEC09557.1 215500063 6945 \n",
+ "NP_724343.1 24585673 7227 \n",
+ "XP_001119899.1 110764935 7460 \n",
+ "EDO48405.1 156227602 45351 \n",
+ "\n",
+ " organism phylum class \\\n",
+ "NP_505463.1 Caenorhabditis elegans Nematoda Chromadorea \n",
+ "EEC09557.1 Ixodes scapularis Arthropoda Arachnida \n",
+ "NP_724343.1 Drosophila melanogaster Arthropoda Insecta \n",
+ "XP_001119899.1 Apis mellifera Arthropoda Insecta \n",
+ "EDO48405.1 Nematostella vectensis Cnidaria Anthozoa \n",
+ "\n",
+ " taxonomy_group info references \\\n",
+ "NP_505463.1 26989147 22650316(?) \n",
+ "EEC09557.1 26989147 22650316(?) \n",
+ "NP_724343.1 26989147 22650316(?) \n",
+ "XP_001119899.1 26989147 22650316(?) \n",
+ "EDO48405.1 26989147 22650316(?) \n",
+ "\n",
+ " sequence \n",
+ "NP_505463.1 MSGRGKGGKAKTGGKAKSRSSRAGLQFPVGRLHRILRKGNYAQRVG... \n",
+ "EEC09557.1 MSGRGKGGKVKGKSKTRSSRAGLQFPVGRIHRLLRKGNYAERVGAG... \n",
+ "NP_724343.1 MSGRGKGGKVKGKAKSRSNRAGLQFPVGRIHRLLRKGNYAERVGAG... \n",
+ "XP_001119899.1 MSGRGKGGKAKAKAKSRSNRAGLQFPVGRIHRLLRKGNYAERVGAG... \n",
+ "EDO48405.1 MSGRGKGKAKGTKSKTRSSRAGLQFPVGRIHRHLRKGNYAERVGAG... "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cs.data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(561, 16)"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cs.data.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "h2bv_accessions = [\n",
+ " \"AAO24603.1\",\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((1, 5),\n",
+ " Index(['accession', 'type', 'variant_group', 'variant', 'references'], dtype='object'))"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.DataFrame(\n",
+ " {\n",
+ " \"accession\": h2bv_accessions,\n",
+ " \"type\": [\"H2B\"] * len(h2bv_accessions),\n",
+ " \"variant_group\": [\"H2B.V\"] * len(h2bv_accessions),\n",
+ " \"variant\": [\"H2B.V\"] * len(h2bv_accessions),\n",
+ " \"references\": [\"16303849\"] * len(h2bv_accessions),\n",
+ " }\n",
+ ")\n",
+ "df.index = df.accession\n",
+ "df.shape, df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " accession | \n",
+ " type | \n",
+ " variant_group | \n",
+ " variant | \n",
+ " references | \n",
+ "
\n",
+ " \n",
+ " accession | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " AAO24603.1 | \n",
+ " AAO24603.1 | \n",
+ " H2B | \n",
+ " H2B.V | \n",
+ " H2B.V | \n",
+ " 16303849 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " accession type variant_group variant references\n",
+ "accession \n",
+ "AAO24603.1 AAO24603.1 H2B H2B.V H2B.V 16303849"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((562, 16), [])"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cs.data = pd.concat([cs.data, df]).fillna(\"\")\n",
+ "cs.data.shape, list(cs.has_duplicates())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " accession | \n",
+ " type | \n",
+ " variant_group | \n",
+ " variant | \n",
+ " doublet | \n",
+ " gi | \n",
+ " ncbi_gene_id | \n",
+ " hgnc_gene_name | \n",
+ " taxonomy_id | \n",
+ " organism | \n",
+ " phylum | \n",
+ " class | \n",
+ " taxonomy_group | \n",
+ " info | \n",
+ " references | \n",
+ " sequence | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " NP_005312.1 | \n",
+ " NP_005312.1 | \n",
+ " H1 | \n",
+ " H1.4 | \n",
+ " H1.4_(Homo_sapiens)__??? | \n",
+ " | \n",
+ " | \n",
+ " 3008.0 | \n",
+ " H1-4 | \n",
+ " 9606 | \n",
+ " Homo sapiens | \n",
+ " Chordata | \n",
+ " Mammalia | \n",
+ " Mammalia | \n",
+ " | \n",
+ " 26689747 | \n",
+ " MSETAPAAPAAPAPAEKTPVKKKARKSAGAAKRKASGPPVSELITK... | \n",
+ "
\n",
+ " \n",
+ " NP_005311.1 | \n",
+ " NP_005311.1 | \n",
+ " H1 | \n",
+ " H1.3 | \n",
+ " H1.3_(Homo_sapiens)__??? | \n",
+ " | \n",
+ " | \n",
+ " 3007.0 | \n",
+ " H1-3 | \n",
+ " 9606 | \n",
+ " Homo sapiens | \n",
+ " Chordata | \n",
+ " Mammalia | \n",
+ " Mammalia | \n",
+ " | \n",
+ " 26689747 | \n",
+ " MSETAPLAPTIPAPAEKTPVKKKAKKAGATAGKRKASGPPVSELIT... | \n",
+ "
\n",
+ " \n",
+ " NP_006017.1 | \n",
+ " NP_006017.1 | \n",
+ " H1 | \n",
+ " H1.10 | \n",
+ " H1.10_(Homo_sapiens)__??? | \n",
+ " | \n",
+ " | \n",
+ " 8971.0 | \n",
+ " H1-10 | \n",
+ " 9606 | \n",
+ " Homo sapiens | \n",
+ " Chordata | \n",
+ " Mammalia | \n",
+ " Mammalia | \n",
+ " | \n",
+ " 26689747 | \n",
+ " MSVELEEALPVTTAEGMAKKVTKAGGSAALSPSKKRKNSKKKNQPG... | \n",
+ "
\n",
+ " \n",
+ " NP_005309.1 | \n",
+ " NP_005309.1 | \n",
+ " H1 | \n",
+ " H1.0 | \n",
+ " H1.0_(Homo_sapiens)__??? | \n",
+ " | \n",
+ " | \n",
+ " 3005.0 | \n",
+ " H1-0 | \n",
+ " 9606 | \n",
+ " Homo sapiens | \n",
+ " Chordata | \n",
+ " Mammalia | \n",
+ " Mammalia | \n",
+ " | \n",
+ " 26689747 | \n",
+ " MTENSTSAPAAKPKRAKASKKSTDHPKYSDMIVAAIQAEKNRAGSS... | \n",
+ "
\n",
+ " \n",
+ " AAO24603.1 | \n",
+ " AAO24603.1 | \n",
+ " H2B | \n",
+ " H2B.V | \n",
+ " H2B.V | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " 16303849 | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " accession type variant_group variant \\\n",
+ "NP_005312.1 NP_005312.1 H1 H1.4 H1.4_(Homo_sapiens)__??? \n",
+ "NP_005311.1 NP_005311.1 H1 H1.3 H1.3_(Homo_sapiens)__??? \n",
+ "NP_006017.1 NP_006017.1 H1 H1.10 H1.10_(Homo_sapiens)__??? \n",
+ "NP_005309.1 NP_005309.1 H1 H1.0 H1.0_(Homo_sapiens)__??? \n",
+ "AAO24603.1 AAO24603.1 H2B H2B.V H2B.V \n",
+ "\n",
+ " doublet gi ncbi_gene_id hgnc_gene_name taxonomy_id organism \\\n",
+ "NP_005312.1 3008.0 H1-4 9606 Homo sapiens \n",
+ "NP_005311.1 3007.0 H1-3 9606 Homo sapiens \n",
+ "NP_006017.1 8971.0 H1-10 9606 Homo sapiens \n",
+ "NP_005309.1 3005.0 H1-0 9606 Homo sapiens \n",
+ "AAO24603.1 \n",
+ "\n",
+ " phylum class taxonomy_group info references \\\n",
+ "NP_005312.1 Chordata Mammalia Mammalia 26689747 \n",
+ "NP_005311.1 Chordata Mammalia Mammalia 26689747 \n",
+ "NP_006017.1 Chordata Mammalia Mammalia 26689747 \n",
+ "NP_005309.1 Chordata Mammalia Mammalia 26689747 \n",
+ "AAO24603.1 16303849 \n",
+ "\n",
+ " sequence \n",
+ "NP_005312.1 MSETAPAAPAAPAPAEKTPVKKKARKSAGAAKRKASGPPVSELITK... \n",
+ "NP_005311.1 MSETAPLAPTIPAPAEKTPVKKKAKKAGATAGKRKASGPPVSELIT... \n",
+ "NP_006017.1 MSVELEEALPVTTAEGMAKKVTKAGGSAALSPSKKRKNSKKKNQPG... \n",
+ "NP_005309.1 MTENSTSAPAAKPKRAKASKKSTDHPKYSDMIVAAIQAEKNRAGSS... \n",
+ "AAO24603.1 "
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cs.data.tail()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(1, 16)"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cs.data[cs.data[\"variant\"] == \"H2B.V\"].shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/l_singh/.conda/envs/histdb_env/lib/python3.8/site-packages/Bio/Entrez/__init__.py:658: UserWarning: \n",
+ "Email address is not specified.\n",
+ "\n",
+ "To make use of NCBI's E-utilities, NCBI requires you to specify your\n",
+ "email address with each request. As an example, if your email address\n",
+ "is A.N.Other@example.com, you can specify it as follows:\n",
+ " from Bio import Entrez\n",
+ " Entrez.email = 'A.N.Other@example.com'\n",
+ "In case of excessive usage of the E-utilities, NCBI will attempt to contact\n",
+ "a user at the email address provided before blocking access to the\n",
+ "E-utilities.\n",
+ " warnings.warn(\n",
+ "/home/l_singh/.conda/envs/histdb_env/lib/python3.8/site-packages/Bio/GenBank/__init__.py:1143: BiopythonParserWarning: Dropping bond qualifier in feature location\n",
+ " warnings.warn(\n",
+ "/mnt/scratch/l_singh/hdb/project_dir/histonedb/CURATED_SET/curated_set_services.py:267: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " updating_data['accession'] = new_accessions\n"
+ ]
+ }
+ ],
+ "source": [
+ "cs.update_accession_version()\n",
+ "cs.data = cs.data.set_index(cs.data.accession.values)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(1, 16)"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cs.data[cs.data[\"variant\"] == \"H2B.V\"].shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fetched taxid from NCBI 5702\n",
+ " changes to 5702\n",
+ " changes to Trypanosoma brucei brucei\n",
+ " changes to Euglenozoa\n",
+ " changes to Kinetoplastea\n"
+ ]
+ }
+ ],
+ "source": [
+ "curated_set.update_taxids(blank_data=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(1, 16)"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cs.data[cs.data[\"variant\"] == \"H2B.V\"].shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " accession | \n",
+ " type | \n",
+ " variant_group | \n",
+ " variant | \n",
+ " doublet | \n",
+ " gi | \n",
+ " ncbi_gene_id | \n",
+ " hgnc_gene_name | \n",
+ " taxonomy_id | \n",
+ " organism | \n",
+ " phylum | \n",
+ " class | \n",
+ " taxonomy_group | \n",
+ " info | \n",
+ " references | \n",
+ " sequence | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " AAO24603.1 | \n",
+ " AAO24603.1 | \n",
+ " H2B | \n",
+ " H2B.V | \n",
+ " H2B.V | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " 5702 | \n",
+ " Trypanosoma brucei brucei | \n",
+ " Euglenozoa | \n",
+ " Kinetoplastea | \n",
+ " | \n",
+ " | \n",
+ " 16303849 | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " accession type variant_group variant doublet gi ncbi_gene_id \\\n",
+ "AAO24603.1 AAO24603.1 H2B H2B.V H2B.V \n",
+ "\n",
+ " hgnc_gene_name taxonomy_id organism phylum \\\n",
+ "AAO24603.1 5702 Trypanosoma brucei brucei Euglenozoa \n",
+ "\n",
+ " class taxonomy_group info references sequence \n",
+ "AAO24603.1 Kinetoplastea 16303849 "
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cs.data[cs.data[\"variant\"] == \"H2B.V\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Updating sequences for H2B.V"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " accession | \n",
+ " type | \n",
+ " variant_group | \n",
+ " variant | \n",
+ " doublet | \n",
+ " gi | \n",
+ " ncbi_gene_id | \n",
+ " hgnc_gene_name | \n",
+ " taxonomy_id | \n",
+ " organism | \n",
+ " phylum | \n",
+ " class | \n",
+ " taxonomy_group | \n",
+ " info | \n",
+ " references | \n",
+ " sequence | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " AAO24603.1 | \n",
+ " AAO24603.1 | \n",
+ " H2B | \n",
+ " H2B.V | \n",
+ " H2B.V | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " 5702 | \n",
+ " Trypanosoma brucei brucei | \n",
+ " Euglenozoa | \n",
+ " Kinetoplastea | \n",
+ " | \n",
+ " | \n",
+ " 16303849 | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " accession type variant_group variant doublet gi ncbi_gene_id \\\n",
+ "AAO24603.1 AAO24603.1 H2B H2B.V H2B.V \n",
+ "\n",
+ " hgnc_gene_name taxonomy_id organism phylum \\\n",
+ "AAO24603.1 5702 Trypanosoma brucei brucei Euglenozoa \n",
+ "\n",
+ " class taxonomy_group info references sequence \n",
+ "AAO24603.1 Kinetoplastea 16303849 "
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cs.data[cs.data['sequence'] == '']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Downloading FASTA SeqRecords by ACCESSIONs from NCBI\n",
+ "Fetching 1 seqs\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/l_singh/.conda/envs/histdb_env/lib/python3.8/site-packages/Bio/Entrez/__init__.py:658: UserWarning: \n",
+ "Email address is not specified.\n",
+ "\n",
+ "To make use of NCBI's E-utilities, NCBI requires you to specify your\n",
+ "email address with each request. As an example, if your email address\n",
+ "is A.N.Other@example.com, you can specify it as follows:\n",
+ " from Bio import Entrez\n",
+ " Entrez.email = 'A.N.Other@example.com'\n",
+ "In case of excessive usage of the E-utilities, NCBI will attempt to contact\n",
+ "a user at the email address provided before blocking access to the\n",
+ "E-utilities.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sequence for AAO24603.1 changes from to MPPTKGGKRPLPLGGKGKGKRPPGQTTKSSSSRKKSGARRGKKQQRWDLYIHRTLRQVYKRGTLSKAAVRVLSSFIEDMYGKIQAEAVHVACINNVKTLTAREIQTSARLLLPPELAKHAMSEGTKAVAKYNASREEAYSKVL\n",
+ "Sequences updated: 1\n"
+ ]
+ }
+ ],
+ "source": [
+ "cs.update_sequence(blank_data=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " accession | \n",
+ " type | \n",
+ " variant_group | \n",
+ " variant | \n",
+ " doublet | \n",
+ " gi | \n",
+ " ncbi_gene_id | \n",
+ " hgnc_gene_name | \n",
+ " taxonomy_id | \n",
+ " organism | \n",
+ " phylum | \n",
+ " class | \n",
+ " taxonomy_group | \n",
+ " info | \n",
+ " references | \n",
+ " sequence | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " AAO24603.1 | \n",
+ " AAO24603.1 | \n",
+ " H2B | \n",
+ " H2B.V | \n",
+ " H2B.V | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " 5702 | \n",
+ " Trypanosoma brucei brucei | \n",
+ " Euglenozoa | \n",
+ " Kinetoplastea | \n",
+ " | \n",
+ " | \n",
+ " 16303849 | \n",
+ " MPPTKGGKRPLPLGGKGKGKRPPGQTTKSSSSRKKSGARRGKKQQR... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " accession type variant_group variant doublet gi ncbi_gene_id \\\n",
+ "AAO24603.1 AAO24603.1 H2B H2B.V H2B.V \n",
+ "\n",
+ " hgnc_gene_name taxonomy_id organism phylum \\\n",
+ "AAO24603.1 5702 Trypanosoma brucei brucei Euglenozoa \n",
+ "\n",
+ " class taxonomy_group info references \\\n",
+ "AAO24603.1 Kinetoplastea 16303849 \n",
+ "\n",
+ " sequence \n",
+ "AAO24603.1 MPPTKGGKRPLPLGGKGKGKRPPGQTTKSSSSRKKSGARRGKKQQR... "
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cs.data[cs.data[\"variant\"] == \"H2B.V\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "sequence self \n",
+ " other MAPKAEKKPAAKKPAATPPPEEEKEVVPPPPAEKKPKAGKKLPAAK...\n",
+ "Name: CUT18447.1, dtype: object\n",
+ "sequence self \n",
+ " other AATPPPEEEKEVVPPPAEKKPAEKKPKAGKKLPASKEGDAKKKKKS...\n",
+ "Name: CUT18448.1, dtype: object\n",
+ "sequence self \n",
+ " other MAPKAEKKPAAKKPAATPPPEEEKEVVPPPPAEKKPKAGKKLPAAK...\n",
+ "Name: CUT18451.1, dtype: object\n",
+ "sequence self \n",
+ " other MPPRRKKTAAGAAAGGKAAAAAVGKAGFMPPKKPKKGKKKTPIMRY...\n",
+ "Name: CUT18449.1, dtype: object\n",
+ "sequence self \n",
+ " other MAPKSEKKPAEKKPVAEKPAAEEEKKSAPAPAAAEKKPAEKKPKAG...\n",
+ "Name: CUT18445.1, dtype: object\n",
+ "sequence self \n",
+ " other AEKKPKAGKKVPASKEGEKKKKRSKKSVETYKIYIFKVLKQVHPDI...\n",
+ "Name: CUT18446.1, dtype: object\n",
+ "sequence self \n",
+ " other MAPKSEKKPAEKKPVAEKPAAEEEKKAAPAAAPAEKKAAEKKPKA\n",
+ "Name: CUT18452.1, dtype: object\n",
+ "sequence self \n",
+ " other MAPKKKPSKLVGTVTKTRKVTETQTLKVSLTKGLKPEDQQTTTNKF...\n",
+ "Name: CUT18450.1, dtype: object\n",
+ "cp histones.csv backups/histones.csv-Mar0624163102\n",
+ "Previous data backuped to backups/histones.csv-Mar0624163102\n",
+ "Results saved to histones.csv\n"
+ ]
+ }
+ ],
+ "source": [
+ "cs.save()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".conda-histdb_env",
+ "language": "python",
+ "name": "conda-env-.conda-histdb_env-py"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.1"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/CURATED_SET/histones.csv b/CURATED_SET/histones.csv
index 04261d7b..76cf6fb7 100644
--- a/CURATED_SET/histones.csv
+++ b/CURATED_SET/histones.csv
@@ -388,6 +388,7 @@ XP_678689.1,H2B,H2B.Z,H2B.Z,,68073549,,,5823,Plasmodium berghei ANKA,Apicomplexa
XP_001349046.1,H2B,H2B.Z,H2B.Z,,124511826,,,36329,Plasmodium falciparum 3D7,Apicomplexa,Aconoidasida,,,,MSGKGPAQKSQAAKKTAGKTLGPRHKRKRRTESFSLYIFKVLKQVHPETGVTKKSMNIMNSFINDIFDRLVTEATRLIRYNKKRTLSSREIQTAVRLLLPGELSKHAVSEGTKAVTKYTTSAA
XP_002369740.1,H2B,H2B.Z,H2B.Z,,237840885,,,508771,Toxoplasma gondii ME49,Apicomplexa,Conoidasida,,,,MSGKGPAQKSQAAKKTAGKSLGPRYRRRKRTESFALYIYKVLKQVHPETGVSKKSMSIMNSFINDIFDRLADEAVRLIRYNKKRTLSSREIQTAVRLLLPGELSKHAVSEGTKAVTKYTTSGA
HISTDB_H2B_Z_0,H2B,H2B.Z,H2B.Z,,,,,27996,Cytauxzoon felis,Apicomplexa,Aconoidasida,,,DOI:10.5772/intechopen.81409,MSGKVPSTKSQAAKKTAGKTLGVRYRRKKRIESFALYIYKVLKQVHPETGVSKKSMSIMNSFINDIFDRLALEATRLIRYNKKSTLSSREIQTAVRLLLPGELSKHAVSEGTKAVTKYTTSGV
+AAO24603.1,H2B,H2B.V,H2B.V,,,,,5702,Trypanosoma brucei brucei,Euglenozoa,Kinetoplastea,,,16303849,MPPTKGGKRPLPLGGKGKGKRPPGQTTKSSSSRKKSGARRGKKQQRWDLYIHRTLRQVYKRGTLSKAAVRVLSSFIEDMYGKIQAEAVHVACINNVKTLTAREIQTSARLLLPPELAKHAMSEGTKAVAKYNASREEAYSKVL
P02291.2,H2B,sperm_H2B_(Echinoidea),sperm_H2B_(Echinoidea),,108885304,,,7658,Parechinus angulosus,Echinodermata,Echinoidea,,,,MPRSPAKTSPRKGSPRKGSPSRKASPKRGGKGAKRAGKGGRRRRVVKRRRRRRESYGIYIYKVLKQVHPDTGISSRAMSVMNSFVNDVFERIAGEASRLTSANRRSTVSSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTTSR
Q27749.3,H2B,sperm_H2B_(Echinoidea),sperm_H2B_(Echinoidea),,74767039,,,7660,Psammechinus miliaris,Echinodermata,Echinoidea,,,,MPSQKSPTKRSPTKRSPQKGGKGAKRGGKAGKRRRGVAVKRRRRRRESYGIYIYKVLKQVHPDTGISSRAMSVMNSFVNDVFERIASEAGRLTTYNRRNTVSSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTTSR
Q27750.3,H2B,sperm_H2B_(Echinoidea),sperm_H2B_(Echinoidea),,108860775,,,7660,Psammechinus miliaris,Echinodermata,Echinoidea,,,,MPKSPSKSSPRKGSPRKGSPRKGSPKRGGKGAKRAGKGGRRNVVKRRRRRRESYGIYIYKVLKQVHPDTGISSRGMSVMNSFVNDVFERIAGEASRLTSANRRSTISSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTTARR
@@ -450,9 +451,14 @@ XP_006969783.1,H3,H3.3,H3.3,,589115521,,,431241,Trichoderma reesei QM6a,Ascomyco
NP_009564.1,H3,H3.3,H3.3,,6319482,,,559292,Saccharomyces cerevisiae S288C,Ascomycota,Saccharomycetes,,,,MARTKQTARKSTGGKAPRKQLASKAARKSAPSTGGVKKPHRYKPGTVALREIRRFQKSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAIGALQESVEAYLVSLFEDTNLAAIHAKRVTIQKKDIKLARRLRGERS
NP_002098.1,H3,H3.3,H3.3_(Homo_sapiens),,,3020.0,H3-3A,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,19412883,MARTKQTARKSTGGKAPRKQLATKAARKSAPSTGGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLVREIAQDFKTDLRFQSAAIGALQEASEAYLVGLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA
NP_005315.1,H3,H3.3,H3.3_(Homo_sapiens),,,3021.0,H3-3B,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,19412883,MARTKQTARKSTGGKAPRKQLATKAARKSAPSTGGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLVREIAQDFKTDLRFQSAAIGALQEASEAYLVGLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA
-NP_001013721.2,H3,H3.5,H3.5_(Homo_sapiens)__???,,,440093.0,H3-5,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,21274551,MARTKQTARKSTGGKAPRKQLATKAARKSTPSTCGVKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLVREIAQDFNTDLRFQSAAVGALQEASEAYLVGLLEDTNLCAIHAKRVTIMPKDIQLARRIRGERA
-NP_001342338.1,H3,H3.7(?),H3.7(?)_(Homo_sapiens)__???,,,440686.0,H3-7,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,12408966,MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLVREIAQEFKTDLRFQSSAVMALQEAREAYLVGLFEDTNLCAIHAKRVTIMPKDIQLVSRIRGERA
-XP_003804825.1,H3,TS_H3.4,TS_H3.4__???,,397466137,,,9597,Pan paniscus,Chordata,Mammalia,,,,MARTKQTARKSTGGKAPRKQLVTKVARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLMREIAQDFKTDLRFQSSAVMALQEACESYLVGLFEDTNLCVIHAKRVTIMPKDIQLARRIRGERA
+NP_001358848.1,H3,H3.Y,H3.Y.2_(Homo_sapiens)__???,,,340096.0,H3Y2,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,20819935,MARTKQTARKATAWQAPRKPLATKAARKRASPTGGIKKPHRYKPGTLALREIRKYQKSTQLLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTIMPRDMQLARRLRGEGAGEPTLLGNLAL
+XP_003954426.1,H3,H3.5,H3.5__???,,410046862,,,9598,Pan troglodytes,Chordata,Mammalia,,,,MARTKQTARKSTGGKAPRKQLATKAARKSTPSTXGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLVREIAQDFNTDLRFQSAAVGALQEASEAYLVGLLEDTNLCAIHAKRVTIMPKDIQLARRIRGERA
+HISTDB_H3_Y_0,H3,H3.Y,H3.Y__???,,NOGI,,,9544,Macaca mulatta,Chordata,Mammalia,,,,ARTKQTARKATNWQAPRKPLATKAAAKRAPPRGGIKKPHRYKPGTQALREIRKYQKSTQLLLRKLPFQCLVREIAQVISLDLRFQSAAIGALQEASEAYLVNLFEDTNLCAIHARRVTIMPRDMQLARRIRGEGAXEPTLLGNVAL
+HISTDB_H3_Y_1,H3,H3.Y,H3.Y__???,,NOGI,,,9544,Macaca mulatta,Chordata,Mammalia,,,,ARTKQTARKATNWQAPRKPLATKAPGKRLPPRGGIKKPHRYRPGTQALREIRKYQKSTQLLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVNLFEDTNLCAIHARRVTIMPRDMQLARRIRGEGA
+HISTDB_H3_Y_2,H3,H3.Y,H3.Y__???,,NOGI,,,9598,Pan troglodytes,Chordata,Mammalia,,,,ARTKQTARKATAWQAPRKPLATKAAGKRAPPTGGIKKPHRYKPGTLALREIRKYQKSTQLLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTIMPRDMQLARRLRREGP
+HISTDB_H3_Y_3,H3,H3.Y,H3.Y__???,,NOGI,,,9598,Pan troglodytes,Chordata,Mammalia,,,,ARTKQTARKATAWQAPRKPLATKAARKRASPTGGIKKPHRYKPGTLALREIRKYQKSTQLLLRKLPFQRLVREIAQAISLDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTIMPQDMQLARRLRGEGAREPTLLGNLAL
+NP_001800.1,H3,cenH3,cenH3_(Homo_sapiens)__???,,,1058.0,CENPA,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,23324462,MGPRRRSRKPEAPRRRSPSPTPTPGPSRRGPSLGASSHQHSRRRQGWLKEIRKLQKSTHLLIRKLPFSRLAREICVKFTRGVDFNWQAQALLALQEAAEAFLVHLFEDAYLLTLHAGRVTLFPKDVQLARRIRGLEEGLG
+NP_001035891.1,H3,cenH3,cenH3_(Homo_sapiens)__???,,,1058.0,CENPA,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,23324462,MGPRRRSRKPEAPRRRSPSPTPTPGPSRRGPSLGASSHQHSRRRQGWLKEIRKLQKSTHLLIRKLPFSRLAAEAFLVHLFEDAYLLTLHAGRVTLFPKDVQLARRIRGLEEGLG
NP_563627.1,H3,cenH3,cenH3__???,,18378832,,,3702,Arabidopsis thaliana,Streptophyta,Magnoliopsida,,,,MARTKHRVTRSQPRNQTDAAGASSSQAAGPTTTPTRRGGEGGDNTQQTNPTTSPATGTRRGAKRSRQAMPRGSQKKSYRYRPGTVALKEIRHFQKQTNLLIPAASFIREVRSITHMLAPPQINRWTAEALVALQEAAEDYLVGLFSDSMLCAIHARRVTLMRKDFELARRLGGKGRPW
NP_596473.1,H3,cenH3,cenH3__???,,19113265,,,4896,Schizosaccharomyces pombe,Ascomycota,Schizosaccharomycetes,,,,MAKKSLMAEPGDPIPRPRKKRYRPGTTALREIRKYQRSTDLLIQRLPFSRIVREISSEFVANFSTDVGLRWQSTALQCLQEAAEAFLVHLFEDTNLCAIHAKRVTIMQRDMQLARRIRGA
NP_499128.1,H3,cenH3,cenH3__???,,17553736,,,6239,Caenorhabditis elegans,Nematoda,Chromadorea,,,,MADDTPIIEEIAEQNESVTRIMQRLKHDMQRVTSVPGFNTSAAGVNDLIDILNQYKKELEDDAANDYTEAHIHKIRLVTGKRNQYVLKLKQAEDEYHARKEQARRRASSMDFTVGRNSTNLVDYSHGRHHMPSYRRHDSSDEENYSMDGTNGDGNRAGPSNPDRGNRTGPSSSDRVRMRAGRNRVTKTRRYRPGQKALEEIRKYQKTEDLLIQKAPFARLVREIMQTSTPFGADCRIRSDAISALQEAAEAFLVEMFEGSSLISTHAKRVTLMTTDIQLYRRLCLRHL
@@ -466,16 +472,11 @@ XP_002287626.1,H3,cenH3,cenH3__???,,223995905,,,296543,Thalassiosira pseudonana
XP_001011273.1,H3,cenH3,cenH3__???,,118356028,,,312017,Tetrahymena thermophila SB210,Ciliophora,Oligohymenophorea,,,,MARKAYQPKRRSNSNQNQQRSDSLKKNKQDNLRSKSAGNQQGNEKNKKDIQDQRNKASTKKKRESSGEKYESARDKVIRRFRPGDNALKQLRQYNQTPSLLIRKLPFQRLIREISTRMTEEDSLRWTSFALVLLQTVVEDYMVSFFEDANACALHAKRVTLMSKDLALAARIRGQKNVTGIFIPTKK
XP_002767160.1,H3,cenH3,cenH3__???,,294874934,,,423536,Perkinsus marinus ATCC 50983,Perkinsozoa,None,,,,MVGVENLGVGFDELLTRGGCGVRDDAVEIAFRGVEGLEDVLKDYMVRNKDGKILSVARPVDAEHSEELLGLAAAIGRSYGSLICAAAHNGGVRLPVGKGDDDGDSNNSSDEEADSGCGGAAEGDEAGDVGAGAGDVGDGAGDGAAEGDGAGDAGNGAGDVGDVGDGAGDGAAEGDGAGDGAADDAHGAGDDGEGSRNGGPPLVVQMMVLVMMNGNGNGADDGGNGVDDGEGDGDGHQGNVEGDGHGDGQDDGDGEGSVDSSGNGGDSEPSLEVSREGSENRPKLLPPVEGRTSSSAAAIAAPPVPSAGSHIITGSGGKVPTAGKRPRQFVKKSSAKKGRYRPGTVALREIRRHQEITDPLIEKRCFQALARSLSREVEASMRWQPQSLVALQEASESFIVGMLEASQLLAVHGRRITLMEKDVKMWTRLAAMFGSTTFMDQEKQVGGT
NP_012875.2,H3,cenH3,cenH3__???,,27808712,,,559292,Saccharomyces cerevisiae S288C,Ascomycota,Saccharomycetes,,,,MSSKQQWVSSAIQSDSSGRSLSNVNRLAGDQQSINDRALSLLQRTRATKNLFPRREERRRYESSKSDLDIETDYEDQAGNLEIETENEEEAEMETEVPAPVRTHSYALDRYVRQKRREKQRKQSLKRVEKKYTPSELALYEIRKYQRSTDLLISKIPFARLVKEVTDEFTTKDQDLRWQSMAIMALQEASEAYLVGLLEHTNLLALHAKRITIMKKDMQLARRIRGQFI
-HISTDB_H3_Y_0,H3,H3.Y,H3.Y__???,,NOGI,,,9544,Macaca mulatta,Chordata,Mammalia,,,,ARTKQTARKATNWQAPRKPLATKAAAKRAPPRGGIKKPHRYKPGTQALREIRKYQKSTQLLLRKLPFQCLVREIAQVISLDLRFQSAAIGALQEASEAYLVNLFEDTNLCAIHARRVTIMPRDMQLARRIRGEGAXEPTLLGNVAL
-HISTDB_H3_Y_1,H3,H3.Y,H3.Y__???,,NOGI,,,9544,Macaca mulatta,Chordata,Mammalia,,,,ARTKQTARKATNWQAPRKPLATKAPGKRLPPRGGIKKPHRYRPGTQALREIRKYQKSTQLLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVNLFEDTNLCAIHARRVTIMPRDMQLARRIRGEGA
-HISTDB_H3_Y_2,H3,H3.Y,H3.Y__???,,NOGI,,,9598,Pan troglodytes,Chordata,Mammalia,,,,ARTKQTARKATAWQAPRKPLATKAAGKRAPPTGGIKKPHRYKPGTLALREIRKYQKSTQLLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTIMPRDMQLARRLRREGP
-HISTDB_H3_Y_3,H3,H3.Y,H3.Y__???,,NOGI,,,9598,Pan troglodytes,Chordata,Mammalia,,,,ARTKQTARKATAWQAPRKPLATKAARKRASPTGGIKKPHRYKPGTLALREIRKYQKSTQLLLRKLPFQRLVREIAQAISLDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTIMPQDMQLARRLRGEGAREPTLLGNLAL
-NP_001342187.1,H3,H3.Y,H3.Y.1_(Homo_sapiens)__???,,,391769.0,H3Y1,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,20819935,MARTKQTARKATAWQAPRKPLATKAAGKRAPPTGGIKKPHRYKPGTLALREIRKYQKSTQLLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTIMPRDMQLARRLRREGP
NP_003484.1,H3,H3.4,H3.4_(Homo_sapiens)__???,,,8290.0,H3-4,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,8986613,MARTKQTARKSTGGKAPRKQLATKVARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLMREIAQDFKTDLRFQSSAVMALQEACESYLVGLFEDTNLCVIHAKRVTIMPKDIQLARRIRGERA
-NP_001358848.1,H3,H3.Y,H3.Y.2_(Homo_sapiens)__???,,,340096.0,H3Y2,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,20819935,MARTKQTARKATAWQAPRKPLATKAARKRASPTGGIKKPHRYKPGTLALREIRKYQKSTQLLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTIMPRDMQLARRLRGEGAGEPTLLGNLAL
-XP_003954426.1,H3,H3.5,H3.5__???,,410046862,,,9598,Pan troglodytes,Chordata,Mammalia,,,,MARTKQTARKSTGGKAPRKQLATKAARKSTPSTXGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLVREIAQDFNTDLRFQSAAVGALQEASEAYLVGLLEDTNLCAIHAKRVTIMPKDIQLARRIRGERA
-NP_001800.1,H3,cenH3,cenH3_(Homo_sapiens)__???,,,1058.0,CENPA,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,23324462,MGPRRRSRKPEAPRRRSPSPTPTPGPSRRGPSLGASSHQHSRRRQGWLKEIRKLQKSTHLLIRKLPFSRLAREICVKFTRGVDFNWQAQALLALQEAAEAFLVHLFEDAYLLTLHAGRVTLFPKDVQLARRIRGLEEGLG
-NP_001035891.1,H3,cenH3,cenH3_(Homo_sapiens)__???,,,1058.0,CENPA,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,23324462,MGPRRRSRKPEAPRRRSPSPTPTPGPSRRGPSLGASSHQHSRRRQGWLKEIRKLQKSTHLLIRKLPFSRLAAEAFLVHLFEDAYLLTLHAGRVTLFPKDVQLARRIRGLEEGLG
+NP_001013721.2,H3,H3.5,H3.5_(Homo_sapiens)__???,,,440093.0,H3-5,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,21274551,MARTKQTARKSTGGKAPRKQLATKAARKSTPSTCGVKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLVREIAQDFNTDLRFQSAAVGALQEASEAYLVGLLEDTNLCAIHAKRVTIMPKDIQLARRIRGERA
+XP_003804825.1,H3,TS_H3.4,TS_H3.4__???,,397466137,,,9597,Pan paniscus,Chordata,Mammalia,,,,MARTKQTARKSTGGKAPRKQLVTKVARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLMREIAQDFKTDLRFQSSAVMALQEACESYLVGLFEDTNLCVIHAKRVTIMPKDIQLARRIRGERA
+NP_001342187.1,H3,H3.Y,H3.Y.1_(Homo_sapiens)__???,,,391769.0,H3Y1,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,20819935,MARTKQTARKATAWQAPRKPLATKAAGKRAPPTGGIKKPHRYKPGTLALREIRKYQKSTQLLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTIMPRDMQLARRLRREGP
+NP_001342338.1,H3,H3.7(?),H3.7(?)_(Homo_sapiens)__???,,,440686.0,H3-7,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,12408966,MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLVREIAQEFKTDLRFQSSAVMALQEAREAYLVGLFEDTNLCAIHAKRVTIMPKDIQLVSRIRGERA
NP_180441.1,H4,cH4,cH4,,15226944,,,3702,Arabidopsis thaliana,Streptophyta,Magnoliopsida,,,,MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKIFLENVIRDAVTYTEHARRKTVTAMDVVYALKRQGRTLYGFGG
NP_001131585.1,H4,cH4,cH4,,212722314,,,4577,Zea mays,Streptophyta,Magnoliopsida,,,,MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKIFLENVIRDAVTYTEHARRKTVTAMDVVYALKRQGRTLYGFGG
NP_492641.1,H4,cH4,cH4,,17509199,,,6239,Caenorhabditis elegans,Nematoda,Chromadorea,,,,MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKVFLENVIRDAVTYCEHAKRKTVTAMDVVYALKRQGRTLYGFGG
@@ -488,7 +489,6 @@ XP_012928609.2,H4,cH4,cH4,,861442511,,,10181,Heterocephalus glaber,Chordata,Mamm
XP_951561.1,H4,cH4,cH4,,84043542,,,185431,Trypanosoma brucei brucei TREU927,Euglenozoa,Kinetoplastea,,,,MAKGKRVGESKGAQKRQKKVLRDNVRGITRGSIRRLARRAGVKRISGVIYDEVRGVLKTFVESIVRDAGAYTEYSRKKTVTAAHVVFALRKRGKVLYGYD
XP_001016593.1,H4,cH4,cH4,,118366755,,,312017,Tetrahymena thermophila SB210,Ciliophora,Oligohymenophorea,,,,MAGGKGGKGMGKVGAKRHSRKSNKASIEGITKPAIRRLARRGGVKRISSFIYDDSRQVLKSFLENVVRDAVTYTEHARRKTVTAMDVVYALKRQGRTLYGFGG
NP_009563.1,H4,cH4,cH4,,6319481,,,559292,Saccharomyces cerevisiae S288C,Ascomycota,Saccharomycetes,,,,MSGRGKGGKGLGKGGAKRHRKILRDNIQGITKPAIRRLARRGGVKRISGLIYEEVRAVLKSFLESVIRDSVTYTEHAKRKTVTSLDVVYALKRQGRTLYGFGG
-NP_003538.1,H4,cH4(?),cH4(?)_(Homo_sapiens)__???,,,8369.0,H4C7,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,12408966,MSVRGKAGKGLGKGGAKCHRKVLSDNIQGITKCTIRRLARHGGVKRILGLIYEETRRVFKVFLENVIWYAVTNTEHAKRKTVTAMAVVYVLKRQGRTL
NP_003529.1,H4,cH4,cH4_(Homo_sapiens)__???,,,8359.0,H4C1,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,12408966,MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG
NP_003535.1,H4,cH4,cH4_(Homo_sapiens)__???,,,8366.0,H4C2,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,12408966,MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG
NP_003533.1,H4,cH4,cH4_(Homo_sapiens)__???,,,8364.0,H4C3,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,12408966,MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG
@@ -503,6 +503,7 @@ NP_003537.1,H4,cH4,cH4_(Homo_sapiens)__???,,,8368.0,H4C13,9606,Homo sapiens,Chor
NP_003539.1,H4,cH4,cH4_(Homo_sapiens)__???,,,8370.0,H4C14,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,12408966,MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG
NP_001029249.1,H4,cH4,cH4_(Homo_sapiens)__???,,,554313.0,H4C15,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,12408966,MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG
NP_778224.1,H4,cH4,cH4_(Homo_sapiens)__???,,,121504.0,H4C16,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,12408966,MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG
+NP_003538.1,H4,cH4(?),cH4(?)_(Homo_sapiens)__???,,,8369.0,H4C7,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,12408966,MSVRGKAGKGLGKGGAKCHRKVLSDNIQGITKCTIRRLARHGGVKRILGLIYEETRRVFKVFLENVIWYAVTNTEHAKRKTVTAMAVVYVLKRQGRTL
NP_172161.1,H1,generic_H1,generic_H1,,15222199,,,3702,Arabidopsis thaliana,Streptophyta,Magnoliopsida,,,,MSEVEIENAATIEGNTAADAPVTDAAVEKKPAAKGRKTKNVKEVKEKKTVAAAPKKRTVSSHPTYEEMIKDAIVTLKERTGSSQYAIQKFIEEKRKELPPTFRKLLLLNLKRLVASGKLVKVKASFKLPSASAKASSPKAAAEKSAPAKKKPATVAVTKAKRKVAAASKAKKTIAVKPKTAAAKKVTAKAKAKPVPRATAAATKRKAVDAKPKAKARPAKAAKTAKVTSPAKKAVAATKKVATVATKKKTPVKKVVKPKTVKSPAKRASSRVKK
P23444.2,H1,generic_H1,generic_H1,,121950,,,4577,Zea mays,Streptophyta,Magnoliopsida,,,,MATDVTETPAPLVDAAPEAPADAPAAPAADANAAKAKKATAPKKRASPTHLPYAEMVSEAITSLKERTGSSSYAIAKFVEDKHKAKLPPNFRKLLNVQLKKLVAGGKLTKVKNSYKLSSATKPNPKPKAAPKKPKTGAKKPKAAAKPKAKTPAKAKPATKPKPAAKPKAVVKPKTPAKPKAKPAAKAKPKTAGAKPKPLAKKAGRAKAAKTSAKDTPGKKAPAKKAAPSKKAATPVRKAPSRKAKK
O17536.3,H1,generic_H1,generic_H1,,54035964,,,6239,Caenorhabditis elegans,Nematoda,Chromadorea,,,,MSDVAVAADTTETPAAPTKASKATKASKATKASKATKAKTTKVPMVKADAAHPPFINMVTEAISSIKDRKGPSRAAILKYITTKYTLGDQANKINAHLRKALNKGLESNAFVQASGNGANGRFRLAEKTASVAKSPAAAKKDATGEKKATTTVAKKAATGEKKATTTVAKKAATGEKKATTTVAKKAAAGDKAKKTEVKVKKVKSPKKIAKSPVNKVTKSPVKKIAKSSSMKAAPKKAAAKPAKKAPAAAPEA
@@ -549,14 +550,14 @@ NP_001080265.1,H1,H1.10,H1.10,,147898445,,,8355,Xenopus laevis,Chordata,Amphibia
ACO10502.1,H1,H1.10,H1.10,,225709312,,,217165,Caligus rogercresseyi,Arthropoda,Hexanauplia,,,,MVKSEVEVTINAEEAPVASSLKPAKKKKNKKKKNKPGKYSVLVLDAVKKLNERSGSSLVKIYNEAKKASWFDEQNGRTYLRYSIRALVLNNTLIQVKGMGANGSFRLNEDKFAKGVPKKTQSKPAKNTTKTAKASTTKKATVVKAKSSPKKAPDAKMPAAKLKKLGVKKVSAAQKNKKPKKASKPPAKSPRKK
NP_015198.1,H1,scH1,scH1,,6325130,,,559292,Saccharomyces cerevisiae S288C,Ascomycota,Saccharomycetes,,,,MAPKKSTTKTTSKGKKPATSKGKEKSTSKAAIKKTTAKKEEASSKSYRELIIEGLTALKERKGSSRPALKKFIKENYPIVGSASNFDLYFNNAIKKGVEAGDFEQPKGPAGAVKLAKKKSPEVKKEKEVSPKPKQAATSVSATASKAKAASTKLAPKKVVKKKSPTVTAKKASSPSSLTYKEMILKSMPQLNDGKGSSRIVLKKYVKDTFSSKLKTSSNFDYLFNSAIKKCVENGELVQPKGPSGIIKLNKKKVKLST
XP_011105792.1,H1,scH1,scH1,,748455219,,,1160507,Saccharomyces arboricola H-6,Ascomycota,Saccharomycetes,,,,MAPKKTSTKTTTTNKGKKPVTSKGKDKPVIKTAVKKNAAKKEEPSSKSYKELIVEGLAALKERKGSSRPALKKFIKENYPLVGSTSNFDLYFNNAIKKGVETGDFEQPKGPAGTLKLAKKKSPELKKETSPKPKQAAAATTTTTTTTPTSLKAKAKTASKKQAPKKVVKKKVPAVAVIPKKTSSPSALTYKEMILKSMPELNDGKGSSRIVLKKYVKDTFSSKLKTSSNFDYLFNSAIKKCVENGELVQPKGPSGIIKINKKKAKLST
-NP_005311.1,H1,H1.3,H1.3_(Homo_sapiens)__???,,,3007.0,H1-3,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,26689747,MSETAPLAPTIPAPAEKTPVKKKAKKAGATAGKRKASGPPVSELITKAVAASKERSGVSLAALKKALAAAGYDVEKNNSRIKLGLKSLVSKGTLVQTKGTGASGSFKLNKKAASGEGKPKAKKAGAAKPRKPAGAAKKPKKVAGAATPKKSIKKTPKKVKKPATAAGTKKVAKSAKKVKTPQPKKAAKSPAKAKAPKPKAAKPKSGKPKVTKAKKAAPKKK
NP_861453.1,H1,H1.7,H1.7_(Homo_sapiens)__???,,,341567.0,H1-7,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,26689747,MEQALTGEAQSRWPRRGGSGAMAEAPGPSGESRGHSATQLPAEKTVGGPSRGCSSSVLRVSQLVLQAISTHKGLTLAALKKELRNAGYEVRRKSGRHEAPRGQAKATLLRVSGSDAAGYFRVWKVPKPRRKPGRARQEEGTRAPWRTPAAPRSSRRRRQPLRKAARKAREVWRRNARAKAKANARARRTRRARPRAKEPPCARAKEEAGATAADEGRGQAVKEDTTPRSGKDKRRSSKPREEKQEPKKPAQRTIQ
+NP_005314.2,H1,H1.6,H1.6_(Homo_sapiens)__???,,,3010.0,H1-6,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,26689747,MSETVPAASASAGVAAMEKLPTKKRGRKPAGLISASRKVPNLSVSKLITEALSVSQERVGMSLVALKKALAAAGYDVEKNNSRIKLSLKSLVNKGILVQTRGTGASGSFKLSKKVIPKSTRSKAKKSVSAKTKKLVLSRDSKSPKTAKTNKRAKKPRATTPKTVRSGRKAKGAKGKQQQKSPVKARASKSKLTQHHEVNVRKATSKK
+NP_005310.1,H1,H1.2,H1.2_(Homo_sapiens)__???,,,3006.0,H1-2,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,26689747,MSETAPAAPAAAPPAEKAPVKKKAAKKAGGTPRKASGPPVSELITKAVAASKERSGVSLAALKKALAAAGYDVEKNNSRIKLGLKSLVSKGTLVQTKGTGASGSFKLNKKAASGEAKPKVKKAGGTKPKKPVGAAKKPKKAAGGATPKKSAKKTPKKAKKPAAATVTKKVAKSPKKAKVAKPKKAAKSAAKAVKPKAAKPKVVKPKKAAPKKK
NP_005316.1,H1,H1.1,H1.1_(Homo_sapiens)__???,,,3024.0,H1-1,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,26689747,MSETVPPAPAASAAPEKPLAGKKAKKPAKAAAASKKKPAGPSVSELIVQAASSSKERGGVSLAALKKALAAAGYDVEKNNSRIKLGIKSLVSKGTLVQTKGTGASGSFKLNKKASSVETKPGASKVATKTKATGASKKLKKATGASKKSVKTPKKAKKPAATRKSSKNPKKPKTVKPKKVAKSPAKAKAVKPKAAKARVTKPKTAKPKKAAPKKK
NP_005313.1,H1,H1.5,H1.5_(Homo_sapiens)__???,,,3009.0,H1-5,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,26689747,MSETAPAETATPAPVEKSPAKKKATKKAAGAGAAKRKATGPPVSELITKAVAASKERNGLSLAALKKALAAGGYDVEKNNSRIKLGLKSLVSKGTLVQTKGTGASGSFKLNKKAASGEAKPKAKKAGAAKAKKPAGATPKKAKKAAGAKKAVKKTPKKAKKPAAAGVKKVAKSPKKAKAAAKPKKATKSPAKPKAVKPKAAKPKAAKPKAAKPKAAKAKKAAAKKK
NP_722575.1,H1,H1.8,H1.8_(Homo_sapiens)__???,,,132243.0,H1-8,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,26689747,MAPGSVTSDISPSSTSTAGSSRSPESEKPGPSHGGVPPGGPSHSSLPVGRRHPPVLRMVLEALQAGEQRRGTSVAAIKLYILHKYPTVDVLRFKYLLKQALATGMRRGLLARPLNSKARGATGSFKLVPKHKKKIQPRKMAPATAPRRAGEAKGKGPKKPSEAKEDPPNVGKVKKAAKRPAKVQKPPPKPGAATEKARKQGGAAKDTRAQSGEARKVPPKPDKAMRAPSSAGGLSRKAKAKGSRSSQGDAEAYRKTKAESKSSKPTASKVKNGAASPTKKKVVAKAKAPKAGQGPNTKAAAPAKGSGSKVVPAHLSRKTEAPKGPRKAGLPIKASSSKVSSQRAEA
NP_001295191.1,H1,H1.8,H1.8_(Homo_sapiens)__???,,,132243.0,H1-8,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,26689747,MAPATAPRRAGEAKGKGPKKPSEAKEDPPNVGKVKKAAKRPAKVQKPPPKPGAATEKARKQGGAAKDTRAQSGEARKVPPKPDKAMRAPSSAGGLSRKAKAKGSRSSQGDAEAYRKTKAESKSSKPTASKVKNGAASPTKKKVVAKAKAPKAGQGPNTKAAAPAKGSGSKVVPAHLSRKTEAPKGPRKAGLPIKASSSKVSSQRAEA
NP_005312.1,H1,H1.4,H1.4_(Homo_sapiens)__???,,,3008.0,H1-4,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,26689747,MSETAPAAPAAPAPAEKTPVKKKARKSAGAAKRKASGPPVSELITKAVAASKERSGVSLAALKKALAAAGYDVEKNNSRIKLGLKSLVSKGTLVQTKGTGASGSFKLNKKAASGEAKPKAKKAGAAKAKKPAGAAKKPKKATGAATPKKSAKKTPKKAKKPAAAAGAKKAKSPKKAKAAKPKKAPKSPAKAKAVKPKAAKPKTAKPKAAKPKKAAAKKK
-NP_005309.1,H1,H1.0,H1.0_(Homo_sapiens)__???,,,3005.0,H1-0,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,26689747,MTENSTSAPAAKPKRAKASKKSTDHPKYSDMIVAAIQAEKNRAGSSRQSIQKYIKSHYKVGENADSQIKLSIKRLVTTGVLKQTKGVGASGSFRLAKSDEPKKSVAFKKTKKEIKKVATPKKASKPKKAASKAPTKKPKATPVKKAKKKLAATPKKAKKPKTVKAKPVKASKPKKAKPVKPKAKSSAKRAGKKK
-NP_005310.1,H1,H1.2,H1.2_(Homo_sapiens)__???,,,3006.0,H1-2,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,26689747,MSETAPAAPAAAPPAEKAPVKKKAAKKAGGTPRKASGPPVSELITKAVAASKERSGVSLAALKKALAAAGYDVEKNNSRIKLGLKSLVSKGTLVQTKGTGASGSFKLNKKAASGEAKPKVKKAGGTKPKKPVGAAKKPKKAAGGATPKKSAKKTPKKAKKPAAATVTKKVAKSPKKAKVAKPKKAAKSAAKAVKPKAAKPKVVKPKKAAPKKK
-NP_005314.2,H1,H1.6,H1.6_(Homo_sapiens)__???,,,3010.0,H1-6,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,26689747,MSETVPAASASAGVAAMEKLPTKKRGRKPAGLISASRKVPNLSVSKLITEALSVSQERVGMSLVALKKALAAAGYDVEKNNSRIKLSLKSLVNKGILVQTRGTGASGSFKLSKKVIPKSTRSKAKKSVSAKTKKLVLSRDSKSPKTAKTNKRAKKPRATTPKTVRSGRKAKGAKGKQQQKSPVKARASKSKLTQHHEVNVRKATSKK
+NP_005311.1,H1,H1.3,H1.3_(Homo_sapiens)__???,,,3007.0,H1-3,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,26689747,MSETAPLAPTIPAPAEKTPVKKKAKKAGATAGKRKASGPPVSELITKAVAASKERSGVSLAALKKALAAAGYDVEKNNSRIKLGLKSLVSKGTLVQTKGTGASGSFKLNKKAASGEGKPKAKKAGAAKPRKPAGAAKKPKKVAGAATPKKSIKKTPKKVKKPATAAGTKKVAKSAKKVKTPQPKKAAKSPAKAKAPKPKAAKPKSGKPKVTKAKKAAPKKK
NP_006017.1,H1,H1.10,H1.10_(Homo_sapiens)__???,,,8971.0,H1-10,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,26689747,MSVELEEALPVTTAEGMAKKVTKAGGSAALSPSKKRKNSKKKNQPGKYSQLVVETIRRLGERNGSSLAKIYTEAKKVPWFDQQNGRTYLKYSIKALVQNDTLLQVKGTGANGSFKLNRKKLEGGGERRGAPAAATAPAPTAHKAKKAAPGAAGSRRADKKPARGQKPEQRSHKKGAGAKKDKGGKAKKTAAAGGKKVKKAAKPSVPKVPKGRK
+NP_005309.1,H1,H1.0,H1.0_(Homo_sapiens)__???,,,3005.0,H1-0,9606,Homo sapiens,Chordata,Mammalia,Mammalia,,26689747,MTENSTSAPAAKPKRAKASKKSTDHPKYSDMIVAAIQAEKNRAGSSRQSIQKYIKSHYKVGENADSQIKLSIKRLVTTGVLKQTKGVGASGSFRLAKSDEPKKSVAFKKTKKEIKKVATPKKASKPKKAASKAPTKKPKATPVKKAKKKLAATPKKAKKPKTVKAKPVKASKPKKAKPVKPKAKSSAKRAGKKK