From 06a328cdbcb9f1088693a96a1fcd1dba771f04e9 Mon Sep 17 00:00:00 2001 From: atom-zh Date: Thu, 27 May 2021 23:39:23 +0800 Subject: [PATCH 1/5] upload Initial code --- .gitignore | 106 +++ config/__init__.py | 5 + config/logger_config.py | 48 ++ config/path_config.py | 54 ++ data/01-anhui.xlsx | Bin 0 -> 50784 bytes data/labels.csv | 84 +++ data/train.csv | 343 ++++++++++ data/valid.csv | 63 ++ data_preprocess/__init__.py | 5 + data_preprocess/data_excel2csv.py | 65 ++ data_preprocess/data_split.py | 132 ++++ data_preprocess/generator_preprocess.py | 364 ++++++++++ data_preprocess/text_preprocess.py | 860 ++++++++++++++++++++++++ textCNN/__init__.py | 5 + textCNN/graph.py | 176 +++++ textCNN/predict.py | 130 ++++ textCNN/train.py | 89 +++ 17 files changed, 2529 insertions(+) create mode 100644 .gitignore create mode 100644 config/__init__.py create mode 100644 config/logger_config.py create mode 100644 config/path_config.py create mode 100644 data/01-anhui.xlsx create mode 100644 data/labels.csv create mode 100644 data/train.csv create mode 100644 data/valid.csv create mode 100644 data_preprocess/__init__.py create mode 100644 data_preprocess/data_excel2csv.py create mode 100644 data_preprocess/data_split.py create mode 100644 data_preprocess/generator_preprocess.py create mode 100644 data_preprocess/text_preprocess.py create mode 100644 textCNN/__init__.py create mode 100644 textCNN/graph.py create mode 100644 textCNN/predict.py create mode 100644 textCNN/train.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6c18571 --- /dev/null +++ b/.gitignore @@ -0,0 +1,106 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +.idea/ + + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ diff --git a/config/__init__.py b/config/__init__.py new file mode 100644 index 0000000..e344264 --- /dev/null +++ b/config/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/5 21:04 +# @author :Mo +# @function : \ No newline at end of file diff --git a/config/logger_config.py b/config/logger_config.py new file mode 100644 index 0000000..02d0d4a --- /dev/null +++ b/config/logger_config.py @@ -0,0 +1,48 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/7/28 0:24 +# @author :Mo +# @function :logger + + +from keras_textclassification.conf.path_config import path_root +from logging.handlers import RotatingFileHandler +import logging +import time +import os + + +# log目录地址 +path_logs = path_root + '/logs' +if not os.path.exists(path_logs): + os.mkdir(path_logs) +# 全局日志格式 +logging.basicConfig(level=logging.INFO, + format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') +# 定义一个日志记录器 +logger = logging.getLogger("Keras-TextClassification") +logger.setLevel(level = logging.INFO) +# 日志文件名,为启动时的日期 +log_file_name = time.strftime('%Y-%m-%d', time.localtime(time.time())) + ".log" +log_name_day = os.path.join(path_logs, log_file_name) +# 文件输出, 定义一个RotatingFileHandler,最多备份32个日志文件,每个日志文件最大32K +rHandler = RotatingFileHandler(log_name_day, maxBytes = 32*1024, backupCount = 32) +rHandler.setLevel(logging.INFO) +# 日志输出格式 +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +rHandler.setFormatter(formatter) +# 控制台输出 +console = logging.StreamHandler() +console.setLevel(logging.INFO) +console.setFormatter(formatter) +# logger加到handel里边 +logger.addHandler(rHandler) +logger.addHandler(console) +# 所有文件共用一个logger +def get_logger_root(): + return logging.getLogger("Keras-TextClassification") + + +if __name__ == '__main__': + logger = get_logger_root() + logger.info("test") diff --git a/config/path_config.py b/config/path_config.py new file mode 100644 index 0000000..0d078f1 --- /dev/null +++ b/config/path_config.py @@ -0,0 +1,54 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/5 21:04 +# @author :Mo +# @function :file of path + +import os + +# 项目的根目录 +path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) +path_root = path_root.replace('\\', '/') + +# path of embedding +path_embedding_random_char = path_root + '/data/embeddings/term_char.txt' +path_embedding_random_word = path_root + '/data/embeddings/term_word.txt' +path_embedding_bert = path_root + '/data/embeddings/chinese_L-12_H-768_A-12/' +path_embedding_xlnet = path_root + '/data/embeddings/chinese_xlnet_mid_L-24_H-768_A-12/' +path_embedding_albert = path_root + '/data/embeddings/albert_base_zh' +path_embedding_vector_word2vec_char = path_root + '/data/embeddings/w2v_model_wiki_char.vec' +path_embedding_vector_word2vec_word = path_root + '/data/embeddings/w2v_model_merge_short.vec' + +# classify data of baidu qa 2019 +path_baidu_qa_2019_train = path_root + '/data/baidu_qa_2019/baike_qa_train.csv' +path_baidu_qa_2019_valid = path_root + '/data/baidu_qa_2019/baike_qa_valid.csv' + +# 今日头条新闻多标签分类 +path_byte_multi_news_train = path_root + '/data/byte_multi_news/train.csv' +path_byte_multi_news_valid = path_root + '/data/byte_multi_news/valid.csv' +path_byte_multi_news_label = path_root + '/data/byte_multi_news/labels.csv' + +# classify data of baidu qa 2019 +path_sim_webank_train = path_root + '/data/sim_webank/train.csv' +path_sim_webank_valid = path_root + '/data/sim_webank/valid.csv' +path_sim_webank_test = path_root + '/data/sim_webank/test.csv' + +# classfiy multi labels 2021 +path_multi_label_train = path_root + '/data/multi_label/train.csv' +path_multi_label_valid = path_root + '/data/multi_label/valid.csv' +path_multi_label_labels = path_root + '/data/multi_label/labels.csv' + +# 路径抽象层 +path_train = path_multi_label_train +path_valid = path_multi_label_valid +path_label = path_multi_label_labels + +# fast_text config +# 模型目录 +path_model_dir = path_root + "/data/model/fast_text/" +# 语料地址 +path_model = path_root + '/data/model/fast_text/model_fast_text.h5' +# 超参数保存地址 +path_hyper_parameters = path_root + '/data/model/fast_text/hyper_parameters.json' +# embedding微调保存地址 +path_fineture = path_root + "/data/model/fast_text/embedding_trainable.h5" diff --git a/data/01-anhui.xlsx b/data/01-anhui.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..29b744be848855311fc875794e6e18e77342f6f7 GIT binary patch literal 50784 zcmZsCbwHKh(k|V?rZ%QI_wt9eX3d)QtXVV9%z9N*L`B0vy8j7NSrNPc{eOP2fj^kro2fb3J2-Qx0b(S; zKVUtGwLG7zf+8a!@uDChf&N>}#KD2n-PYzqJVF5#Mk0L{cnA7y&%;5l!Oa>77sAwY zrRK5!(Wyj~usMFo!RZ1y$Fw`P*Sf~zAs#Pla(Zv{cfI`xE5|+~qi!jvfw?5nQ3e}q zp=hfk(59Om6T^bdqRjg49bJh&mJ8?Unfr1DYV2c_a8l+qm*}lGQRwxdGNF!j$}Vhw zD8n6(+24FrVCS#-U5p%LlVK5t_YA8z*lDdD*ZOT%{rTeLWO)pi0cRs83a;;tK}B+p zdr=b`+1kfEo_a=SrG9HV+_R>Ihq&kRM1*7Pv89y|X~s47Ct{_LF` z|MLU=c;)8eVE2TRbNT7zc13FUmf9aw*fy&A4n|904`KA{uA;ujT^`zUv3^LZp`F5# zUCR8Cd`QI~B-?LSnzmm)zq+tHK>y$9mM3G;CS$pzg=Stze>9Yq5MfB`&7=iW<@|Y5;+tZ^*rf2Mz+!~e@7ylCHWs3 z3u{)Z29mG_Z`6pn0iinbm-Xty7aw1(h90OkU}W_ikm@X@8OJQ+gP$=FQpW|x4!n6+ zPD95ZibG;$2@A)%JrIO`y`39pf1<+ltNnn7dL{caI&IHHTdfmsgDqvfHEs9ilXKgP z^PjPg6@KkTpt5lzSf6(q*hZY~&ZlhSOwgff`QQ2PXO%k0qlwY7xu2sF7~$f=oiI{D|SOk`u74C zA#2&p%9$0^wqe%KK?L|&EvdY}aFjJNTm*ae?E11+5Ro*rcl~=e3tn@F+9yjavhx%l zx}kI+IS!?%Z6Exc#&=0##;+?fzdYC5*k|N0AZvU+_l7r!>h%+jeJopNcLzINW1C*_ z@*fx?-$blOO5bh{ImRl_yGiKCC@H)RV}c#r_$#Vq^J!erm}z za);O#rNZfTVs~RivUV?)q)T!N3nN6^<;FHnpaQtM+dPDn|GxnKKLYRVo!&WH zSy;II-{B`4d+`gI7ZOqi2`Uo!|B-!|0=qw{=(cx>jE(V6FK;q_-HB!Hg=KyHRH8Il zv+nZzrQXT!S)VK)?^O+k@PNTSeGU$|7JV+e&-^Z?supfK?#71hn%b{cHpdok*9jJH z_qVxd{Z9UNh~8fR^}D^iyB77mXjB!wxf=7kJwG@if3wUbdiVQRj;P=5AFlSx-k_)HK~nNc1xSFi}4WgZ?TKF$9s1N z-|tL9@7|8y%%6#R-(7UHwA4z{itf4Ho;G&)Tr_svUa!CRqiDlIqvka7xxQW&^}W5` z?k}+y^}G7}y<>-Azr*+P_xFzL?z28q%1bWa%kzGtwyTzIuG@?CZ3hoOUrQsm*}L1@ z)kR5jEn|y!oBaf| ze&@?C_p(#wO%ca;CyFNoZt+zOAA9aZtIj72HvMjnduc^(59aUAmyg0!MX#sM{LZg8 zjoMn;GHvg^mvr3R^zYprZ;pNYVD{(ieU5K+N26cI`N0+0)L#2JFzs@tW#;nB&F)yv z-4$l!##E=!A%EQIV!#Wdju+XdcOUdPk40Z}T&=XIY;TPs<55%^P2LLOQ9rF}c(-t+ z$@BAW`c~-HrP-(1;Pft?l7Qk1p5*D`945C*Kf2=80>RbZ`O#47Cg1g#+?S)xXD@EX zb~{$bRE_#N4EKmmi^8i>6UsD?A+~O zoI2`qJ|jBwG&X7p5Z)LFs?d zBlCWpgm`ipEs909A=%cwcrtzEIHEa3VvTWYGvA_ z`L!!ga(*yBC~o?q#Dv+Yx=>L#Qjv*=3+409uK?`anVD|~ElO0Qe@>dnzuc0iyqPCS z{&Ic-eGIu;_n}aWTkD(Z)K#;8sduPAx5M}_?Vb4E=yTN7vzm(RyUoozTiac?AEGzJ z-j|l=$6Wi{Myn;M9YLzm=_4CMnCH_!Z9A}faITGA=Oqcho=`_)OIj(RF~(m^dzv4h z5N1HfP|eo+pV?s_($fvKk3ukY5W+pkFFkvZCE=*Z@*n)(o|U8r`S?)tkj~4$F$_(Y zoBGW4ejp%#X2oAoWW^uS6X0JsDq4TFGc0ach0z!1N(EQk)uPALnFREt`ab9heQM3; zDag;f(Urk@@&fu^U`cWJr3B8727$UUKV_Et#B@~^0UiVg+(p9Qcl#^UUD++Ww@8Dx z4NFeDlGaAs1Ar1W95uAC-OuW!q7pY|3^Kk|x~UML{F_gD7k@R=0R+E^h7lhuK?MyK zaKLkjqw*H1{($B|7+0@LmV${p8>+~VCIE^X4)FhU^RO>3Af-nshcgei^+wF=q3PG57Ciqmu0{qNEV=Edx$VR`L#y$-M%;*z1cDhnH zcE(Mg5#UP3Tp0)#qxK_V4k9cqEU#QYX96Y*XT+iC@6qP_zvImJhe#nyrlIsBp?lH9 zYJuFi(eYRBD?CcDVp;eFF7PZ3^{H5U{>dfEhh{ ziA&k0^z0xBbgk|^CciH2Cql}<(ORImn|z;h5iBf6D6|vNp6>x>5ei2|X6-VrSNI(d zwQ6bvumZFccf!QP^@MVy?I$NM=;>zmT0bUh05~wcj2NrRER!Mz;L!YiJq1OjS_@G( zU`q!;V8i>A;0Z^mY=WbNg3$2`V))OY zRaaS=Ju?;}!N6k`fO66TUi#lKc>xq|79UtnIL|@7TH!YYr)h;A2kfMhMzE`1#cP&D ztg4C%9NJWQryJn^b0c)vZWUp2P=nt-PWyk7AxeRrg#vfhv#_<4oz1o7+9sVq;JT;d z=_m>tGY?gcJz8QzzyEFmNg2AB*VXI7yH3mEeZ2blQ>tu6h_ z%-#S}ErMQTa>ECw3CQCIE=u*;f?_bkJOm$1l#S~D>cEP0Mv~ECU78UZEDr+HBVb|@ zh7@{@ns!9kc&zf(q#F>Trek^{a2F#MUi=cUMGo4r-HG!?IZ&KW2tuwp<2N91`dpDkyMl}HDb3N+hUtibwG{7#s54j+*D)2;=<*3ksO{BBrg zW<*QT#R%O{VC2V;%SBJv1!3crp&;It1J3*7-g%un!S~K9?zDd~;6}72Eq2O;v9h!# zz4XfoJ133O&*Z6nO~xDuWGZP~=|$US{uq4pzGa%Pil|HFjigwmb#^`G8eYBd@aeVc z{&vptR7YotUv!S)Exw4)qa2aWk|%e;LCf{OetqG16nZD^rQ}iomytAf=SPG0oB8jE z&d*eOd=|l0r0t%j1#~|{;WFsYn`AAr)&b1xCE~MWxBT7S@FB4dYX@d4AG}=-sbr)9F;RXPwV{) zV|+j$W<(Vl<66DMtO-$5f1|K&JlaP-TKa%@8uS4W!0Y6^_ysWJP-4(!y*Gw$7xeCnXC@Dv*S3Y5k>s`{iK z3D-K>iw1mjr1EsX{;O_vq;OB*@ z1T~!&ti6EWqX$69s8F94_MTkyxTT@@Q7!C!=$o5}s0*`NT5)Zh#XR^!ESZK80C`MB zY|Y=QTH9aS1U~8{|LC-Gy<}enIKB!N)&w0i>?}PDS%T)!l=Ueg0gk45?;K96Y**g$ z!b$*S7}X4a@2v7-4O$uUqR`{`l?&Ws>0|Q2zVU_o#t&#oh(FarNyy(Iv?A9fYyUX- z3uPQdCA724ecv2w<3N5weW$(@rUOa8>_E?G_;q;w8IeS9wL4 zM$kuY9PCgkeAXM$EK-)DyD20HA!|T(2orN)mD{{mFri7vfTWaziHsCf3X~@%6&w_Y zmBBbmM2tw2ESmjLvE>QJ<{X7%hhj04z|$N+-0{T;o8OL-Du&U_@WXt*AkwObgnNRQ zCtbrIWUO)!eO=3RSMNWv5!A(>45y@F_feHse6nP3r!ftT_!$Eb0+xoZTxM+Fd!8~Nsd-Dp*)?OXM(?KtB zGo4M^TXoS-)tnrPQZ|R)u*EZ_f#}|JiFcyeR*o186 zii5X0;KhkI1_p%#2P*|w35b;bBLX76-(m&l7?{@4NF_~#!S|1oq;C=a(7QeXi~Zh=p?fpJT78bYwF5M6MIQkNmay{*&~pBkiBQK!SPJc>wG^5p z8X-#3`&S}!^R0R|$b=>Y>vE3(K%m9f0#|R>ze_4Pq{}El*BlgFSHs-evcyWXo{KyJ zaf>IS0b2%Z-nx6-+r!+X!oUo6;xEEpl5C-Y#WVYtAWJYiu|l50h*#dusJgvu`6;Oc z=JXJI(8bdU=rZ4QP~hpDLZWct;yNP680#Y90esIa2D)*|M;{5>+!Upg^S6(3=Lf-_OmZ(*pBsV z%yx#)f0XMVnQ%>J+SAeQx2Qg2!_&-Hx8`=wU}2Aa=0~uZnqx~F{hUJl$_E&F%-!1s$$+CkSF9@r*UUt`o=5p=cy-?_?Fl|TF{!ORMOnx zn3Zdj79D@Xw3md|wjd}0d7HJE81R9Xtn3hoINJM5hdE0I#xAzHF!+)}4BQwCxZm9t zIg6g+CoaywrDo39M2>KV8%w;l%ZZM|W}g5yrYHbx%)7;xI2Tu$8ef9VYC|nWBgb%U zosBJOCaiQxkMs5EA4*9yQWU8U13X1JbY+q*B2^L;lG7ezE#3H25UIg1At~N6dSuxBc2V=0I9v!qUc++y?fso9&Iu13&&?!`{!x`CQ84r7t4HPBtVdtLX> zEgQD;!fe)~KljK>V25dXSy*3Vlzm!cqA=c`^t+45HH%3s#XNc1_>QpItXE(4S&aDJ zk!{HD_`~d~_f17#uIuN%c1Tk#%%6;y4P}1uuYS2YmSfAj5aTzy$yPiNjRF@SqGJZ! z+;4u`DlC4b3O|LeDL!sr>F~}KDIoYPqPV+*+ST!)auZ& zZ8ke7B>nZq=F zx`G9vYG7pLxa z{VBW3avOQlS(_!E7Z-G=Eol78XPbJ+eUB})0PYL-&!!g3--vU?f^^M7X=)0>~qK6M`;?k7cx;DJa-_K@eJ_Y!CzgY1y&j6-J44~G|W2{Kx62jZDeRA@86F*Ly=s- z#q$ys+Y;9P(v`Gtm|xzyXL+Esw`$wMUl{g_+*P}MkkBpoDt~r}X}MN7RM9!!$tch5 z>KgOL!LMv|`y`Hn8q%!xZuQt5}tB&-UJ zjK7jB4%oeUo#4|P*sdqgxyfdixS&+Lwn!hY+X2F2TS-yw?Ew20Qlr zc<9GO4o>NRU*>x`p^X0jb@wz#E(;Y zu~E3YJh~F`o)rxkPk$~n%Kqfv1f#goqLB{euM8UifqnD0=+tm+sXwq3202O261m7} zMbv(GlP)j0#1>A#=bT8x?_&`}Qy5=nDP-WNceARm?qy0A<1jIDjIFcJ;3zO#i}vf3 zFUWT}WZSwMC1niSD3WF5+i-TYh_yI&N z{5TR!qMt2Yu=E4*Lso_f*EfH#@yTPRIw)b9kx-)~Q`dJdP=~k_mJe@%8qlb5lG(f> z@zs4RD>M>a9I*RcpfVE>wLpAX2(|SGUcq1v@h}NT7x-aq4jJS25?jo+F`j6?Rc3Cq z%3#iGxIiPesx3B%QqylC96S}(i9a2B`+q`Z9J^V1qYL~qTV3axO!OltLln}@ zejdMdNhVv_OV;1HZO+ zVnRVk9`n2R1Hd@RRD6O|z>*eI`M|jk=BrUr=Ns;B;{Qum!Im0J@dIY1(fB5c<9TYV z2Txx~wf#_~OK@=*dB7i0x}Iid%^^sI?y{DO45qeGWQqhfl5Z&7FsU+$;G>;FfET{` zY^8R3CZD;gl1z%@R(xj4mpqQFN3ZR54ts4hHPJ5i1aPsZ#~Y~n?*=9%szlOQy8oWM zB{P32IGYjyct*2iQt8c8Mha?!4H$J4p+0kuqoSg(d>~WgyoBH+o|y}rJ>uS0Nv)zA zDtdX$mu?UHi_!o*YB<&nV z-B>$Cm{hWGOGcS5Rrp7DUh698Pv>8 zalV{5sGnOo-cK0W^0wLR<^NqGOx?y2pp;079=9UjH5qIz^(YPf#%++PW4kL&;~Sbb z3K{J?+D2##U%{uw8xey5Yn!6%D7H%Ebk4p)qCY`1wZa}diyu(E9R}b|crsdFsCl!4 zi=6f9jCk1|jXZzEw_USO6k;R#Nbaz!m+q(wydiBo`NFt|A1i!08LrF`2o6!`0h8xP z>~0dsbAB?j*REOFr{4(NU6bvK!o`G;x8m2Z_J&Bn$n%4DpPZ@w`)o~QIf#5oerP` zar97V(ZCWer)d{RG(TJ4;>qArLhPV%6rH4h8XhKuLd)cMbmgXzK8UL+IMr%+PE#xU zW}HqQgWN$0!|WU;3Ff-}3v9x@ zd6I#6&lm@Q6%l{fGBx5MYOg5j1th1lcgG7jl)Jbj+Nc>4P=h|cK!hMMadAMYf(;6V z$TLOfl2#;gyNGknz0^dZR(%pKIg!uYa$qZibdHP@%W=NZP?Vo!M4geWM(TugYRkv2 zjJdQZyx4G6hRCS^IC20Xo}5+&O3n^bi10-mvL0VB2f|H(>sHBNzKCi7mUBcQVU zUZuOB5|x^{Cr`o~mnM!vhLIYX`wSZ<7TzqWm34!cCoik3*fS?FjFIZL5>9JkjjbWe zg%tg$F~o*(H0n`%zNuv1>5|+#dGrr%L-)sIZxY$v>2V#kuCT( z;^Z+VrrCLCXb^v?RdW$h*Q|s7fJ{bM^ux-zUFPwV1v3_jNOA<6vuVhEEEptg%E5j9 z1@x%w9i8uTZBS+lWIoY&OKa=Hjesv4#3Jw=9Y7NhD0B1Dm;GfrwUzkJ03ggoU)R?4 zx|FhSq`qr^rk_M}!w3N)dE~z$d2uh`@Ml)*!k<@`7~*MhNDs}fO-)#e3rMX2%ohO6 z;bHAP@zN`KT#lC$mm)<;_bg=sQOegUWitt38}lt6FJw^4Ua5n;kxUA>9!{;UYgeVP zKp;XkFGWJ)Y9N&?PSUv&4ohouu~Zsp4$XaNP?;CU&+z#oj(%N=RM^P)&vy;dSHNWa zoAT+za^IrpF%@Gi;Y4E_N74JM|GBDzK1Pl~K{+RtT-j$#Ch+cM8ctzF(Sra&C=Wtn zc^c)E$T1Qq?#-YEDKsdX3RXB$OCe)!eTTYA#A+xBvW@i!*8~iO%Hqy01d-u4@tMR} zpACiNt?Cykv5yT}Xbz-(gq`36Pz-;h+=~`IH94dkJyzSIvcq*cbtdGB*$lKE9E~ih z9z2JlVz652@larhe?eS8ovtocH3umQPCXBlTv}cH^vIipdNr3e^K}CjlOUSRI7IE8 zZ|3NfNp4)sTGx?OuOYVJ8tD|u(GMH-!D)iaco&CX56Li3ZBQLx4*g*o*BYAPAz_Zw$n7n9_ z!`FU3`SOPervrcep;9REcUz@LW|Kr0%XBep`Qb8I9(5b<@n5kV%zV3!$R&?f7;3Rd zInS%5C1NaF!igy}#?wTAi@RWP+OM8N*=7^+CGsY(ZMERE0#}Zb2{@JOX*hjQ(kK%) zsvxbxqM1@-#R!~`l~Pc?r>MM~211+qetIBGDa0%;g0~S&Y9p_nrxuQr`R+>fgL5Tm zhu5iD2qV|Gp++2SS7!CwSF*-w<>MWyV2bq?lI)JWqmM|a0k1$Z%V8& z%jCpuwW^t7aH16&3@IbZtcubKyWZ9DV+e?oIr0)|BHXsZ&#keV2hJLZNm~k=mT)U^ zoe`q%Id7~WgXk|kAJRyGyrG@}KUTAxiS2)qgu{XLykmLO5`9cFkoh3c;gF!XNCPJi zBS#cXgMD;u1Ba95 z%ee|9D=0Zi_x&;hy&-2Oww&jI5xATJslK|gYh8k;GK1TGQ6J1WXYqF*r-%CEddg2o z%{48)*0>1#19Kf(34EvG6>iOEAkjf<>1{PA-zRr4rBY`u+Q zkypQ=_|j+a9H|4DL--~kArU6`7r-G`g0(aJL2-kyK9`?9>E z6>9+~fLqDIhla*hToc@4TRwW|IZu8^%V)ikPRH#`tA`kj#|1$?6$^q+oxAa+W?LmTDRqRpj=z8nsN(aQu;WtVUFG7aM0HS z4GlMmqU#fv!_y{TdCDLi)nsfaZAf0*i#_$07hnVS|F8ipPAW8mHOm{DLr`(gTAo@K zH2?RGB__a4+|3@i33F_Mav{D1X9DYLpmJH08q0=+)(0TI4u-hyRFrE{OD6el8K40S zhFp@fbdm_*3qKF|I?e~3q_jTt63V$b^rC7(nWNV+Eo9XI=yBL8rBX>qf0psRFiCqe zq!T_O2k#|)X_ZA11=QFKt?xmeV>9TP8Vl^x*(6&&qA)pkE)^pBJ@ilj>r-FP@sEn#ecw@vZQdy`Hkk774MW&1@`-(@VD`;V)K|bxMd6s@PfK4Z$e9&3Xhh< zBdNKsetYjR9l0l|_w?n9S~oQ!|7j&?W{f{QrbaVUwV%u=ZEW4dT{vhbRR?+zmH*WF zgb%%r8KW$#Li!vo_x>xLXF#7J%M@rMJTaKdV|5F0=}25$wdmesVf6pzZ4o(S8X~wS zs#-YRsHlLVC*+p-PilDQ8#;~UkaW|s>FN$|-~y%Gbfo|ZMB*kD%ZhX|%HKVb1^9#jFg#A5DGE^49(rBmCiOK9cXfc#=Q4GLwXHc$j=19cau!o)IAwHvP0!JXr$ zljthb&d&X&Y<4@C;fc{Z!I>boJQ<=eR6;%!hEhTn@ewfyAFn=}J{2pDws4z$|127c zX(jx~KMk340HE>7^k;)YhacY$AStKjzoh49eA^)p78QRS;Z!8pZbgWR4x={6WfcV1 zZ+vH`pw4$FOa&_^al~Gcu%k#Az+V0anCTSZM>#k?=N*uO_b=d){7wUG64waCmV-W+ z+J%_O2H;1&6{)=Jl++Rjl`&j;J>ud@KndJTcAJa0&NMXu3R!1jnm&?hBc=TLQCDw7 zi%?WVCyac~L(u*^1xli&Jwwo=*Hc+m=KNUn45DGS=GCmE6gbJI6ZDeTm%7xi>)?`s z{M=31B<4`LILKQS&)E3ii2zM41;(#+sSV6A8(x4Yq6p2Q0}J_LiJ<`5@o^i@dSzA3 z(HkmuunO|EBh)Fl*`d|;4VMhy=WY5;Yy-6_dQUJrC;h?OoWb#p4VJ#Cw0db03lL?z zPS$CO_$uIu(+;lv(@+_;?p(=**|*N{g1C&E)3-m%;6V{ux%;Hwy-iC-AS>HLzQwVo zl2oUXHduHaQ*Q`aL@b>1<6@4F`T)mFYPKYojc78x9>VjFqqC(hI4U z=LdZh`ll>2y{~{7lO_-iK*xb;3zTf_suusDbjI=~t>}NMQl5z39?u^n|_Iuhi zLY@Dt0-x=3s?7_r`%VSUKdXqA>bX>NNQX%r{DhQi@|Ut)e}$-^tt>5ZtJOf%Qh8q?DQuo*LAI>VNZwka;zm1uiye7Ju>08nqg{Bgp2Kc9D!Tk88;4FhI&dO#twHfe zLfvbpwq5*adZ`no8V1fU8_%6?d|OKOeD%Yhe5b6uNqgK^n0r?ZM-9_p(d}%aR-$Da z4lX)L`JJ3q^KEtSTu5 zb;O@)`T)s)^9P&?_ib*XRr$S&e`I zqdq37L)gZBePas)Kl0!66`=CN>8B&Tk`h(Gah#5ez-iCUcqV=F>H}w86sq6wegi!Z zDbPUp7xNjvWc4g8UHVf7m=mQFhRdO%RoA{GMzBv)F+T7o*F9{g|KIUFvA{v99SorqF~$bPq$uZJk&yX#ecY{l}X`jZU52&9(kpNg@5K&*4D; z`2|aB&Gt-0jtDryzt-jqFnBBz#dw$~9d2~z$6Slsu34MLrS@bWbiLGUh5l}0US0w^ zfEEq3BRpkT(oOuI!_tACL%Olj;Y6AP9=AAp*jBu0U==w7BL|(E{3|m!vFGg7X@6Sl z!y`=pY=>8K{UiZNzU)`8_61nSV&{q&(3Bv_rodFkA?ZJYk{Yc4&uwssMx>CCMn$|g z^Kq*#qZgUjrqzJgl`9{1tv?_Y0vnDrizBau%_gjbsKdlJKCyls_g#s}m!RsE)WX73 zhd`xzJLyQZ%&-&#aRTzWk7o?mM{3`Q+RADmNU8pPvi>-}*ctrbdp{I9R2=U)5LXn9 z{of*S5y76Pl`Byd0G#1hyv~{5CVj!cC1?NFB~M6Hb!&fPloA)Ah4WCHAXrFA^ACe1 zgIApBNB@|xHGO?>d!XXT{UUMvu++vX%(fEL`$ja^QbxM8x(6XVjP~92e*)pLQ0PBE zlIs|VToH;)-G3!F^$*E0*`p3x(0m0`q2%6c5LEIQ`yQ#Wq$@H)Hiqx!^GY6mx<$5(@fD1vm!X=M?7@2Q8mQDT z5(LJYl%32f9@Ugf*QX`~yM+)=De%eTeRoj8GfM*pn3z%p_o$^qRA@AM_R*!&A^4Pf zmwAFf^|A6_)rTAlF46sJ)38=N+n)^?6(GiZlHJW?B~ytB$mpJlVbgCjLo{ZO>$<2& z2$z;CoRih?JP;q+|&-|-DW%UCi^3-HX<9yIR5CM1=bbmc9cA!N84?8GTl zZU$UED*|UkJL;W0v|$s zD8BxUo#`+)3G1lbkuI1d<*1b+)u$FPf08FEcAqEu3S%hlL!LYJoBQflLHz9jkstR7 z0jrcAGQZ_w)2V~W0AUxnI0UA2uMY${F8IRlD>u>gT_`j)DB#HLiS`{_+uX%NPJ7iDsb9(wT}w6D6Se$;M?;n<*{*K5az-Q!$sDI*&0B=Yb zr``8$S-W%E9?H)bq2whOT)ZPXh8w$9J3yf+mHwXsRF37J@~6}ss4LmG>IHEIJ37UU zIh%AXIMJ9-hBsIMRUXorrHvab?Mnw)l~x80=xsW(kPC8q$7OWu9V*~t%PfHxE};5k zeq{g<(fh_K1^IQ*Up?xOHKW$v5Lc1pq2Z8DC^=AWxmQ>Mt=WKl@ zJIfgf&cU$%5jotQVvCW`01E5a3qBh%^0&$-o)a6Hn2OZ>jRo2b5>fZh#`6AK#)Ym_ z9_9sgMtgEx+VjRW`VmhYN6dU#(@9_8|4|%y>gAM3D%w1#ko(g-M9uEA6U+0t@ik`l z2&cqknFU6ChF9~{H)K;+K2ePw@e4QhgpvhJ&=Xpj#^A*Y|dFor4^Kvk}EtIulV zwPYeC6It}oLx1H+|Dg%#^1;lzV%;0@c~+qM zefQy0W}Q?}pdd4N_xUQy)#p_!Ys~8+M$p@7m>qb{d`{u(pmf&L=4tbX zgfF88e5nxl&Ck&xF+Y&Y9dGYOA$~k!VB6=ZUk^fVN91Mhj)+L#uL>kU^ee{KR<9OP;1adWn!)p~y%TD-_N+OQ@_ft~o5PM?3auIh zX{Kq=yPm15wzPBvP(M+kXqU)-tbg-D3Z)5S)dA@HisdD|Q~k(axSeE#VKoPcAfssC ze%AdkR43xVnb^gG(WJAg*;33Qhvk{GB=zd1mV+?@xjo9jEhaQ#*~wKyF%Ob%}9Kl{MpUe-Egiyynhe0Q(LdenY;_ z)Mo2I>Z5UcgkF#)YAR0ET6dZl#YkaL0K2c=u|SE-baGKlzXtW8)vVH?5=EWB8%1dy zcoY1}@pQIZ5Or~Fnj>KkmH9kLj45`eXeIUp0d->QU*-fJ&;-q3{f@EO(dd-tAxRgjwlXOuEo*WrZj_ zO!Pd`D%j=YA3`S7N6gkm6$4Wpv#H+}NRr0M?GtpoS!-K;pwm=~sSOMLRo@+qx)=R~w8oPq{ByM_6`w7$`;6b1pVDAeedNC|ee}f0SRs~}v9nmt+A+9CaMS{N zY=#Zpzik>EK(f1n?qd9$`qr>0B5ZH#wP@7PAcA`G&YNd`r8Z*qv)7b8_(b3C64UOI z`uQBzIbK8NovhvEjg6Bi%(g3@1T6PrESdpgPDgt5lYiJ`bEN6hX0z`Xe>~YSPpITk zLi|KY^qLVLM-=&akTQ$|Jk0#8Zf@~i)un);JFRUDvz*mI-xXi$0nOB#R%05QNydW} zzeV5n%&Gl%?^Y@s2aVl?hpkqn|2ialE?`wzsD4=YFl!~J#@;%}kgHG#IC$o`m67%w zuWzAYCvBlG{e5dHrA?n+?q)5F8< zHQd_o?(aBt!{rk%H5hx{m!ImECDONwkjdHrzYN2Br;SNEmxVuVW8Fc>$-y|5 z`8iMd$^;qle1?_WRST-+QvXm?DvnH!tY*{jAGQUyrqD(l6y#M>U*pdzLHiOSbt^Ce zo!`7zd!HrQTY01250)XYssbe0sm@pVY$g@(uA$n|XUPGwu)i>$u8#xIn*PX+&Zsca zn6$KotH{shB>TrAhUFp8uB0_ZBe&Z+cyaBY$tJFSQV81BRwbJhUd#DD5iCp$TQo8* zY^An;7J(PTT+O=hrtb7PntUeBavNnq&Z?~hV&nPkSwq)uV@6d)^dVm>D;B+DYcl2+ zLrZD*3jDtD)AGy@GZQ49SEl`orziX>oZosr*lt7Qms=_JVzE`CrZDrc!QWJsR90?lFU!8` zqfJ*{fk@y03%9tdo~-&1bw#w9z9cB~GeM?8@p*^)e7l09Kb$)8an#?}M{a3sZxVO( zBF_XKPvCHX-TX_%ErdPrS~ekk*OAi{m6?yDZrBf(_#3>#95R(*1bUS7QIYcSw$%oN zXJo`2iB0=(LtIxyIM-CZQM^J(a$8F&Gas4G_$xi57$2cSs?2N_00pn&+7`vhde3=Za%W6X4zY_G+GoUL;jT1i0`(f4ITxNm{B6>mIaBtn2zbV ziuSan|F)-jB}9QPj7-NgtsjXx)+I<0HrfxTh+-bUeNp$DLUng_!dk#oJIOwlX-{1w zL*i*0RpVyDlbw1#N6S?k0yOtDRAks;+q9sa>Rss2{$6eWZPcfg;Fm!M{1q#OqT+0y zqMj(48ObKr5q-?xI&`-q9_}#*n{m<%z6h1&h7|WRq#J)8jk+z&J&ffsd!fa06N!s- z-d_gGCdZ3E@+486rM_Z#S8kb2ykw+AJUn;no%*>e9Y%wmc&nXWBW%uRc5uO;5$MNN z%b{b1Q80`}ptv08WEv%#Y;I{rO;S9ZjOxeB%6?;#B5y!7d61;I=EsYkY83lT<~GFA zu(RX99Xl=RmA~X;8Zrcv{fji@H>PW`=0sx#R9Ce)llr4U7Y5yiLha{^L2{EBY5r`g z=VieRqRGao2QCqg=E~mbO+CR_OPHiPo&HYE>@-^ZF(pL)8|wN8d+PmW$$~eyPw{1m z`_wyBBK+T?Q_zdJMC*Vem1_*B%)W`WI{$FlG5BSv72blWc2S_5a^fQq?leaq=lG*XW4jaiSWU5FBlCO^F#1d~C{3)AAEVzXg6t#a=HUe{xSeW+@i0>l%_K@3ibO<#uh> zI|*6!WtlJMN_O%wU-x|0oKE;#qPfAc*X6j{VOMR}h&;E%4~G3JT_mP>yf$@?jJVG@ zDK*z13;||yrP2UZIzqc%{j8SwtYxdJ6N=67idEu6@Aj%yg5-~tmqh*b#D6BL8l1(d zwJIV%#=lBbdxaUQ8kz$!hA)IMbRxCVp+PaXM{29FRF z1-X?+ea|Ab!(Pmlp)@y^ z#LOtF@tfk=PwGNMW0;Rs3$X^)A)~q+z+Xr$>EdMBvxswJ&FVsr@{RM}$Y1L!uJm&{ zkD*2TEdRhIaoka43Y^zGCRu+P*rBAW+5<1OS%|ptaT^-xg)Q5 znl1p5HPSN=Qo@Myc+!J!J*n0R(SCf+Zrn(p~c0EgKXA$CDO4hx*D`>)d*6uFU zb8186JcK>H*IDluKit!cAV0tua&*Jl>yG7#ZhVqha~vqJqGi2&E6pugG&2+&B9IS6 z4Kod?$E;6w+L*PCXDLM^5V=nsN?Q+vTnRH#yXO=8vvHi{^sm|2>kP8Q{;C}RA=>W2 zIkQRdVO3!CLYLU&RwAfnEp~cWK!_S56#7*0=V^Wb)SSV*BHH#hAnJK$qp{%j+KZb& zxF8dMo;=6v6u3VF5#)W@!-pcB4itKvR6+%|M0Ar#XpYr`9#APKFv~kL4KodY<d+Yk@M^kSbjY!D20Qgxxl!q9{f9a_L??B_%Do1jco zMWjk?!mwfoanZ#Tm3~|x>9mFA)}*E)yj4MQiWaqSpj=#zLGeZrxu>|Hz>~S5NwIXi zM`e}z1$Oa94@&d~#;-C=FSu5BH24plih0BOpqZ$Kspeo567PW=As5Gftb?^Flm#I& z?~iXw?SkThI-8F~YNuw@A9$uv6gL~JePQN6&3kl2cMFP@{Wu0zkdO1#7T9ry@ryfl zj?&#&cUGLkmkhZ+w6koQBJ~5EYJ9hmu1AYFFWx`wYNkI--y-K9vv!y3@vD8rrk!yN z@C<)K&Oc=9o|hU{JDB}AUt*C2LYpkfvB*FqQ@hV3h{b>f^~bEwStZ&rnq^s&KdT`v zuEiZobqM|^WtMw%P)0_#K=nDg!phwdsVJWWm!pF{p`rA>mv9O`Q#)m8?PMyDD;nq$ zhLS|A)MS-AWl5=4|MS+9_%ST1YEq0rSt4+Qk-3;Hii}Qx7Oapt1&?HW%yx>;sR7@y zFq0T1X}+6mFH&%HUy-y+v=l#yOJX=!kF3k3HR*qbTdqh-Nuw}6BiiD+=}(ekH)O

OhL|^RUX?C=ZFsW&5b7?-nu%Lr5*kV zu?L=%JOF)xGP)x6nZT69_DYP*WjARuMKPuF#BihaqsuNro?oxVF+}l0sv;TVKXtbo z&?dFL(hpMlp(ti9)C0B!%mSMI`lRTgqkb|&(G<}Qt-wq54w-X5fq{{iUv1%gHhZvt z1_K#~;+sTez#bZyS5w3#6U+~rHj^z9tT2w7kli-+ea136G?>c~N&WD5TccvOGT`a? zO0min8MP*8vW&wOaz1C#yDoKgC!4uGnfFo=J~S({^I7?dhD+1u#@Z3R^WW{%K6Zq%pNjRt9kx^ahX*%WgitjLa<5yS7C%+^i)pJCwOrccBW2B&Vx1i2&xCTg- zt`PB$Jxtbv?-ai~TC8mfrUSq*G20?p)&kNi=soahrX_lQW_gIfJVm^yE+J#T(o~UQ zlH(6(@H0o9MFzz_v-PL+WQw>kuw+!g5^=kE%`=(_x0mk8zEEAKVEU}3#?zf=#3I{4 zXs4htTSG{vZ{rl?TzqTthJ`g)8haFFUTPEV$t^#T18(P3!c-pRfM9hcBQ$q)so6q3 zDoydQzj%KNr``95ok-~rD50GaI)f|Q<0FiYQ!&!M;092}_q?=}7LaSee;szQ7y_1@ zSEIQ9A$hh42M!(bWtRAAKUhrE+LSJVz>xSXfODEAF1GWP)dUk9Jy>m)@{C4*pEgn+ zYuDJ9MoGM7KD0bJR!hVmJcKT|9Ri)=eJ4*PU*q~Buw!@dqbPwW{KNf>@BzU`xZ;&M zi&s7?+p-gU7;cxb?*uV?iNyVd7aYgyb+VqiGHVvV(wwAvl@}Q~!pmErIlHe;{LkSRLKJ~ss%-Gz7%3#{`3`lzve~quB z7QkfHss)>nWMV+2`-sEGBgR0J*f|COi48WMbTiubq>GpClh!oQ6E=)|VO@tX3)I4M3ggn#OAEhgBQX5B z;TOV?pkmIlkMGA{sCsQtzI)o)NgxWiVTuQ4@$h%(2eu7HR2-wyRopj9;H`BdM?)4P#gL;i=IFPaSlgoLfo>8tCdr9jhAW_8^ zwY0OXYi-$Hy|?hNG+x3%Mp9&j>ep-OCEP1gNDR1WePqpxx4;Y7)N0;}!BAwCU}%@> zfkm?{@cObjI>9(fgG7Eg3N)#WB)Bd8fr!sPO;)fBD=bTv=Qjs(u%}HPQXnGEmgN;T z%KlWyT2x30i6&3H1@@N1)iw$X_J{pSH$+#3U>jQ@oY;Ik6#qS(#T)B{Qi;kHBEtJd04Lf7wnsdLf{}_t3S*B)Q8`T zKOwtXvy$Ii3d>0Sc{KS?fjpvS^D&$kpMuV)$oGU`A5H)@(Xt$+>9Slr#>nrb$MVJn z8uYxLKhmee?3Q{BM-m~UQA!Fw2>WhQTah^JLq7-R{t*Gv8%(&A0PK_z4(A1$#6>D; zAkS|~%mX|BZ=z<)9=+szsP6=+7}yj|40#kb*uu@I3q?o4e)XGZ@!%M!M(`K9RE-)7 z9~)TRW7P>0HrxsaP`4?@u$+9bWvC{sM>V-NU9OKn9u%cU6+(47N%P*Eh(M`BBX|o$ zq7HmGED7lG{JoW<&5&ykEOI_tlFw*Pe z;nBEn@y=jken7CS@b9G>w2OlLE5ZnvWiA-uFMQvkEy~F7`Iqz&v2kmg?c+Yfq5GY- zn{cdY^Q}5W*0t{@upK~BU0Atl}F>yr6( zDM^0uwq!KwIvNGx(7|B7uoT_m+k?J|nGZ&_{hp;f`I~JqM*F?Dl&mAc@*(v#+q+y` z9YW>{Cyx_U0b$J7ZA2vI3#gupiKU3gEvq0@VFlLoWd93J(TjLen&$4`aKl|Z%fbF? zw|XbXuL_GZ!kyU7IPXwB_z{jhw7TI8zsOF!;`#Bv*nZP6Ne=cW+8bbh9&~<#&xD6c zC%V3171=-Oe98EyExwZY45CTj z%~#f_OtHPL>QUizs)e~5Ax__eAJSdv1T(>a2W6}5(al^pwt21}mYje$^Q#`NdrkOT zla+fwC-H_F{kKk*4btG-$=y-CC!C(2f`2_FochQjwr6k0iJ{+?B?z0I0h4_l?^q3P zB)EpQb?Ts$c#Fn-Wziq{{(7Sp!A2|bc6YxrVXcjS#0|k22^!f?>(7o0CkXAoM2*-6 zD5N;c&?MOUbF+f9+wey8GtaQ%V%Qe`IGXdYu7EW`ioT@?Pt2~KLcE(An`}SD^jtM4 zIsu0Pxd-@Uhn;e)E*9~*u*^sm8QRtFl?G)H;#x4|1}D&S*l5&9k14k)1B1-vqrEAD z1Ov(GWK9~>uv~*s^-tgT)^0YYZ&7GQY|k(zZb#aqP!Svk?~RIHVmN9j)J$$e`;Qjk z^62`GP$TAyiL}bx4sZe|mn&H&c%n6$X<+F1wCSuV)^PoyFL|{iZA4B`S)j#rG@KQ` zM-)jrzG!{!tj>60U+!hGtcVP%fZ+NY2YP~s#|Jngn7w6jl8~u(CG^mI9DeNk)iC@u zu{$x84~B4z2kEGG1{4yNSLdH75$9)9NWW=Gb;i)hklIkbwSE@(O?4LoOG;&KMxwT3 z8OlJ8nV6J<|FA(SEp$9kg-4EMNS_Gl>Q1*>+xyFI%!ZCOz&S7n)jB>BI2p?VgwW{x z4d09JI^3N&tpFkMCa9nm6!et{F>MCQ%#>(Dj;^y>MIfClAXLQ%x`2m$LZ!yk;_I#J z5fCe&8@uv*X!CbaKMmK(PC*Kc679q(;;vUL0iDha>A|mM)w&!_)QDoYEbcYxXP~sA zM({g99rUe0H*9U>_Y4%>fe&cdT)4XzFmdFH5Zvz1{t(2XfBs7_T7%~zoU>f3G=F_$ z(vxyg?8%Px>RtZ&5P?k>-lU&&;1@evjx`!Ydu^o7S+NUvaJw`^I|z^pqT7Nz=L3oP zu0cf^CehEKP0bTDynd3&?|$KhO~4xJL}K&B;7jvbb~*P+B)j*x*TR8$N5gqe^a*Fy zEu&|$xEzi8*pZljdy*w@MJq+IgsPmJHnY#kT^6WB0ick^+8|hs9MzCs8`32M?odL> z?*!umc3{AzRE}yWf(N|&()A%whDbV8eQU(B*DtDV+XV4BlzMwHU#w%Kds^23PmA5J zC8iNSRZOx|O?Txrno}v$OeSE67BWHdr7DMXP!Ca-V4oOLNmAj^+5OeE+6jKuqiO4O zW6c&T*s-E9{~2!>tw<(#*P-(R2pF# z3pH!KO1)mhF)e9e`{cL^m7a{Ci5V$)s0$DbWim0yIuey+_#gLNU0rtX%O+@swQCsl z9$$~@=p2BwTkK~OjZ+I&VihCBM4`iv1wdDHvnL4XCx5jaC-ZJ~|N4msH-lY8V4eFxGyN`NL*&#B{h&Ny7j-98?ScLw{;Ldln4+*D=%O zgp=b+0z)D3qqs7{+tze|=X8qkE@_J9f{sL|o4J;D&RloSjNr*9atcFjiA;s4?_v_2 zwGjg8`MADC4!d?cbpYgdB>tAb9cO6s{&6YAxQ97<2!jbyvFHCGvC5>%!zPc2L1Kp? zdZ z%`h?mG(eY|p}x&fJTRO`@h6;?$bw=rGAYHRlZ2gBi_ZVHMZ=`pKad(Px(5Irc#Jvk z4BOBcRle;ShaV*~;%qW7^w!B@nBX3@6F;GRyU^0>b33r9AzoU>jwX36wMY^2G57TV zD7z^R;7SHxz(^~ReVcn|Yfyb3{ybgSep~xs)qQp03NQg|=v#gLQGiHp;WObK+A(3g z1-i1Auh4nl+52_hGSnum=1uzv@8h#&zYWg8C;dhjW6d2>o9+c2*v?*(JqTXrq$tcl zWSNR@Rr%9+A*%_%w|?doL+T8|B!Uf2F&Z4_wV+Dsp}j!We&FNW;K6TT^f}e3!beLD zCFbK#wTuZQMF@5$H?+}yK&f@8@4e+K?k!=dF^JdTnKjmRti=*1mWnYYHe2U4`SoVT z@%gUCLT^3*E~6YLWi-LVc;)!8L+QV!X}HMMO3zc&*B5c7_Zu3N_3%p1U1i#*x^!+k zK9p89ZOf^abuLuyjGsg(vr-ux$TQkX;ULZ3qm(~WLj5SzVqBO2A2?FcF+{9&QK zP%~4Icmw+(Y3(PiyvS|nh{F$9e!oGee+DZH4fZ`lB-wUiwToK6DqF}f41Yld(@7f8 zpdB3eCsB}nL3deohGWK_`QoPk0+~@az*Wx>BSSKs5P^*oB#g2@A&BVCvzV8HRj2D+ z0N(96vJ8^PiMnV|2?1Oi$B}+rmI-dj|I`-wna*z(QIcne54BelGi0V%O*pg`$tTqJ z_l$0$P&U$FrsG`zS%#Mej%vHz1K-b#S~KcB@e|Y{ai&WH)X0TpRhd3XxF;tDDg%2& zhB{df7uZPm`=RBel;VrR&b};uaq!wh#a11il$p)Qq6b4Gw72(>wpyjigiD2sX{0+? zHq}H=I@MCW8GTAqClXb(UM$fER)O0ypYjpRxy*VwMgvUN4YQhcncR`jolM921Y>-rI+_RrbaL*^=JRr zXbF(I;JhNUfa-z2Z#F$#T!Mr?7Dp>Hk#Rg=9vrp!*A#G%NceU&-RH$!e}Oa zW%cVd00bKonrKLVHXC)Dd-P-eyil<0Z4$%2b3^+}jy)1cpZYE}#GVYfC?|46It}(Q zbdwu15MZBhrcm}JDxTLw?ql`pE{-9|J+L1*Ai#_!s`0}>?}e?(jeCP;KPSU)Zly43 zd0l|oWTqB@+DAU8w#T|#4jA4^UQFs{m3HT=8$iR1u zk89W4tfxBIFpObs{H|Kd^@cEPJP- z)=?z>h+uuzC~XKd-ypQbCK=Tw?DxU*!o6lhSr@^{1WS&(&uK30G>8H<*=+j_phdjz z#kxeN+FDdp)(JE~1Gm(}^mqU)p87f^qwoF+=5wUIiST8`qjnkly zm^p>HxFu0cy-g@HFo~M^SCk_&GZMnmh`#Qz3_SR@Rt3B`Qyg|WW`h0vKn5=L`>v=b z4b-8gE%n*8xG4jB!p^_|q8}FUi#5(qq@#o}FR;A4eQ;&NO;h=4h|mOH15+3%S0_Ir z+!B&{zQo<_6CMl${SAfrJ+CPPTKsG3+Q6c)&L&)dU|+MJ+oH!8%7?*g!SnZj_rOaV zMIf8)_m#Cl+!*U3j%NZ(u0AE*#}V?-vY?mE(h^f=m#AkOhy@z>(EtVvy6@~-koIj z6F&D2tD??wV;1M(ekPeLN|L^GFSZPIE=tX}CzF+C&?9!W?D?ukVUkdb;u~Di@4O

6M-!!Fp9~+Q~;GF`2_x{h7*89++mIjh-80&dFiTze0y>KxbgXKH|SJW z2Lvqm*|P7(%VWJY`UH+=nx?3D89tz`x%$995}0=KX*~}t$5~*rvwT2Q$}p98K+=ROn3o__=xB1`(x=iMLscjnjN=zHm6gvo1XL z^?iz-@+SD^-AvXhbz$rFxMs%V6C-ZnNls5G{2-nhA6V7A1pE2~_T7MecpXLBvy(EO8AU|DXavfAC5Jt6N zO2?X8a^R?v$LipHx5y4Ar!N>)in+Eej-289XOY8TL>s1XR`=4${1pnFllRzIMJa2``|8$<4VPy zjV^|&*=k%jf@1ixU)rCor~Q}KY}vBt1gGW*0ZX*#qDD)YUsjW840%z!-oqJ2?GYr!ID$O z?R2%;!0~1KFMdtX6+W9fO$@EpjM}eU;_lWs2|n9e7`}L6P@L0b%s@axgx*FwQ=9-!1Ovrp;(hn2l~`~-fOIR;%rF?_q_b#XAb|Rfo1!h zG{u@t%KZh2*&5Fa<28l@oBfBS%GboSVH?+li#d6rdOdu0%y1}Rax~KtoNGIlI8VbUj72utR#qtFmmU9U(Qtc#Vyot-{XR}}w z)PLQJmafO(GXLBAgH98e*P5(hfTU*DI_%#_bXxoGq<|ebmt5j?pvnTwV&`JkOO#t( z=+Ps2+vc@TDdsW+&D8%@W%G9{<8NSLtOj zeg-N5am}la0b`CJi97TmQBX}o&|2N>dO-hp>etZORk4c#(00L+Wa-csF4ur)irh)n zcjwwlt4mbUq50PSRHcbJYFs|*v1sVHFtzjQV@kM-?b~hv3~|5(c5`T}k+zs>oA*Ac zYGW#BrzOi;lit@mvWAjXR??D)uY~Q$zwEk=Z1NM?%*i0=stNvyWAoP?uYMpjNrcyQ+hM%G zSy6KM#dpdD`c)dg}0#b!sD1)ZsM*f1LlV9b3jxh^=bEsafsO`#Rw$#j5;vj!o z#xj?pMZ>X3nA=6jboT7W+>?9hPJ6Qhv_XK=)2>K~zX;%j5Xh?&}8ui<%` z5zGtre_kKXll#)pS)DUQgI#Nr?<~;$oRcZ`U5ap^?VH*=eX2RP76&KJ>L)MD*wJiP z$9U3{s-e6%nl}3QR!hET!fh4W@^F5X4c$lKQ+p4utVvc;GaGN#ls z!=SBn9;lXLV`qyG5KWZ2)%{gLsQwMC}*-%Y*W%W3x$R7!k0FpN4!ufcU ztSW^M6m)0Bb>VWqZds=J{H1>hsvVX4C!%{YAJCR<82}|-Bj=k+C3Sx#kEV!S@BWj( zE=8qBLB}y+_!?rr1Xq+T-lgyz8}5)iMv)Cy&MpQkSy-flXL~(c5dxWa7GZ67eg%Gw z72pogigt6(U)Hg*G|r_@43YP4rko)$`pANom9}KeVE;3b;l~}B+0r77c9q#uaDo+3 z364Ecz?YmE5|N@`zb#@%eG8UILah0hT@Xitq=y<26YQEWN`nw3^j!4hZ?m_)y3NIk z7aWp=6!I_EZ=1hz!0Blbi_Wg{71!pLHF6l6vZ6-bJ4kN{;_!P^@Zo4X0-_pLm46~l z13%U|H1(S6caPU}vq)-#beZzRV1j!62wd_r5|Nc(;>NMPU)vYs#L4s5eiN76L#eR1 zDXriJ%(uBJQ6tlKS!z-t%XFpdr%;i><>@OEN1k?pm8_?r^zl#kB)1pkJ<=`K=M7Uh z^dEom2D|kO!XEUc>$^@&hHhBl!HDEf63;zAm9haoq%l1X}5(F`>CEOAOx!W(A1v=kz)#Z>@pB$u{gw$GrX~=mP zh-^kwqg;~^S}bUOlfU&gXS(7wdNwb|e59|$~fI?mdMQUAbYa^UEUbe z7={idHVQrO(rs3W(NCBkK6SumE|_Cel?hxTT2#ro${u^^28VXJI>dg*Y6IFn|7{duFZ=;gnc=Nw{)gS{il<<# zQDnPpik98HB*f*7B0i2qdTB;0ynj}!@9WNDDj%(i>EY5t#_vhy^<{o9j3b%d`JoyZ z5FZ4W;t7W0sonU$Qa zcmY)aIF(~QCw%9pe>9m#UGR8xxQK~OHYi~1MAG(YY_S&v*`|U-(h8E*I=gKej0HT< z9UbNlP3E*h)P$CIJk(SkDe|ut3(?hkSdy>cldm{QQox-^c0P@VB>fxVoOoLgFy1K^ zKXoaKJR6)n32J%d2i^0p;PbEW(ZzA4#cwMfRjjuEO8yz=UwVL9;!zoBqKwOgZdIm< zDk}K)=QVmobOK(mP<;ip#HWQag@r67xJVoi@k*2-D^rIOQMw?5#l-O1HYro%&m%?h zHE<&-R;*yQyduZ<5CL^7CFH|s%xR1CGb24L2U>}O{E!<-@XL$;2-N(;mZtwSi_hl;d!P|{2Z)$enyw)#H6kA$Bm-w9teVPYmN z$Ot3sMTh|1Gd;gNdd+zpTKvU%#z~soP%d<(OV*}JWr#2#I8SZpdrnwh*=1_>yU>&| zSS0hhhDe)!`9Nd5=OR3fvE>GVq6?i@Rg#{gPO+0}0}+{tu&sB>tU)GIP`d(zQH#NA z(KpaFJ_sFLC>mwzrbt^n0hxyE8VveA?^_@KJ-52}&=%^@R%C6ZUD?Tdu?Htb|2OSY(OLYoipQ*iA#BMU}mXvh) z)HIN$yjv)}SjfBv%Q1g81zU6^Ug4;H-7pwhK^Opi{F?(-f5JOXh{G%Q-E7U7xY#;5NM77iOq5u8x(f&MOlmf14Wv;+uoXU#T! z-8N~;^40l)-)o^4hvHP?#d{;fb3P?%&TkY7%n%t{DaiyneWNdk*F`XKWo1=hf#8cIO|i~e<{X%t!%pB&xJScnlT z4aO4gk35{Keo*MDmU@nUqO>?zQJ`3W9{ySqX|E3bl)lFYx<>$^dvd@SBEq7W2#3Op zkLAutmS7=1_p246(HTIIiLNe7!FjP8htQb}YW7dsgDnW;CL5H~Y_PC}HP0P@K^E}V zQCAncHW0ZU4>QpaiI-3b82W|Y&j}??fqFKy#4xnPmi0ooLzSE2UxjUv6` zd{$GbCAjFXBzUJ~<5m36yj<@cOKr{rza3bu@0H%5MG(;YEnn{Df#}~}(!B=?TT51jxDF_U&75S!5=KH^Yn>TxLxdU z^rIZiKt#LVM^LY#z}(J@I;wUA>BYMBJ7v4MabpKfrcqgHmSd0^+P~u7Ll`0!MQGP~ zc$z8d{}5b)&Rpvc6UygNatx~6XOD4bj}LxgRuStpYYfkoC6rmDyP)q_<4fddRsr$r zR@A2^b|q8j65Q3`*xha5(4ML#vZ21pVFW~?id5_z%c?wuB;`2;@%At{I8purb{WV) ziA-#c4Wfo&ta=KlAh!eK7UYe8TpZhjG5i|)Bk0v`J%5o99Aod+uW+klM|#gvt8M7N zGo%J{PY-tncu8E9{{IQ;>M5Ab3qW@IKY<_S9)+-PdxrwIqV~C8ln#rc(2M z1}n(;b2WH;RRz{C6hUY?W01g84%ScbNTX#Y|VqJI!Qs#<*pWacoeyR%*YM2-4;;F}Y8u;Es`u z+2lV-Vf&zDh=4kT8@qR=lo4C(fp5u7Mnmj@KIGzGger^11QxK0J)ns3DA6Q4NKXv= z&_qK2sFlMr3}qHtt}jv_qc*$5HM>;v3cw`ElRwxvwww_x-SUfAVHFL$_~kv@>rF#* zBh&XvT;g4k4;YIAk$e6bZvNMU#Ab8E0{#{atL3uEMvHW+w&_OA9J>i1=<1<|G@_2=Mx=mc=SXNlc#xza$(3ZuA9I3P1NanT&7@#skO-XjNG6>_?Ot# z+g=$CErmC+0Rw5k^a^?=i)*AisVv3W(2e9J=U~NLNoKV6$?i@5T39Xl2!F8? zASux6Kxy%6G=8CN@v7Ldexs-eb>BI{-LWP!wVYuU%GR6^sh8UHkk8=J5$iBGFJ7?0 z%b(k!n6*KikdXb2al~~avqBo+0l1`Dkv~h^KYZ*znU?y+9tG9ksNu-{vzt51;35ju zv;VHNh@VtVqyx;YhrD)6D{ApCIy=vDK+n94sC#*z?LW`ee_~4gOOo?tcR$INv1c~&auTU$-;#AV3iyXxzBNsgLEbXLeEZr$vF8>3*uB!46 zd6-QWx)(S7YnVrinBNEH2cS*&?w`flQ};#nJmZK32Q3hC0ZQu<1NPKQCwn!GK3!i~ zhgg&-d}{0VcTZg#T}o`#sv|2EdqFURxTKXUjx`Xqej6lfNauw8QzQC{I1}+ZTjx)N z_w=eF=;3U*&vo%+;fiel_EU-4mL`vz0H0K04aSqj4U&IIZz9%1b1VoW4Fv%pZ7A9L z!I5@70ap&)UZlqe^)K)6)=)&flJ$df$>yLnOOtu+SDiIqFV?ruk(dnR(hj<4FGRH4VGd z5*aL|UT#J8G5A&<4|)x(gnnm4;FJ9a8`6-)U+C%#k&qEzxJ`aSdQGgFAJ0hXE!=-* z%oTiHHB|l@$lAH6 zzNekDw9J{aD@S2=MLDul6+TakRPl!?CxTQOK#B5LL(8auFmYbw zO?T_&9T>GsMxtS~5F!v!U5pxeDzd|Ut;hNP?T3%tjZkWClYi15Drzt8Y=`zg1ms?% z=Mihtm>p~`65|0!9lVuJg|pr9;^$Igg7GRcr5T5{cQI7>j><8sQ{C|?2xo@*fdsA1 zF{jKn6C}{VVsOGiNJ0elQY2d{Si&f>K8@tTa#+GaRB^Ak$7mrZ9#KbXC2NCfna z`a~(#Pf7{#F-L220uK(?e({AsJTE0+fr+P99uk6OqrRBsuV0N(2faN{_|$3wwd zp&|yrMKPs)ciWirju3$nGl~ER1~HEHvtzw+w+`h;7|U}@&6PDYQK3b3NwK`dRdaNls z=1}ZOKzim^d3k@A5KLa<)cxTKCkwbe!QN`DL=2sm4;t1L2eqw@41ZWO_D(-2m0SRy zH_O`=3vs^~4chUbZX*rv!-!raUVKHW<-d`6xq&bBK2yfM7qbvvn_7%d83zU_=ysg> zlJejLlSo`m`kMfOK=2m_{Dl1s>+cVaCrHwDnILvHhF`BdoUWWZB{}Ho*RPYaQwbJf zV70M(aTRHC*H{<&P}esEX~#zB`-;S7Vf9`h%^aKWeJa74e~q@K{ks$G=nrEoDD}fX ziODl3SpZjZ3faZ2enc`cZ(naJ@4o+Yg0oTl=KHA%g=(F0%%Ylb0gPu`)l28nyOd|V zAJKeVpHuG6fE?Y`I)nan74Vryvpj)!Bs6kw6ojowpcIH7mdUeil36eW<2}Xf-Hhe$ zp*pF!Fy@T1{&TAO`^66@Iy=ax9se>8by(jE`M^nz?!N&$F_tzsZl;$ZZjyK18aHE= ze{d$(rkL?mi)yq59j>icBDi+VFe?n6xar#YK(!r7p zAgCuzjEyRsHvY>wj^%FLZIaTFg|1V(A!Df5_pM#b=!}-U<*ZVsqrZ|Er)lgZ(kV** zSb$0=)xe1P?id{0e4IRV1`^M17qaBXB$Yi~N<>83>q${%;L}D~{90c^C$|*y(1U*i z{%hM8g^xKEt6(%Mnce@$~g6rfE)Vh8dGSie>VUBZujsfIJbG zY}7*BkvfnhGIS>n#~+4H2!tiq9&Dli!wYVS-+`s?9vFYE8mT>!e2)XNw`x}yY9aUP z@~V&rKbT8jgmA7Xi~*`Zj3;_%HpJx#XGBwt0P;i}vJRk9$L<_VD=2I4``NEAQj{+{ zMs~f=X&?`#L_)cEVePzXUGu1fZN)r=@p-atQ$)$q0GKZHw$c`Q!M{{@`4iwp(aG)Y zYE<68a85oTH_VNmgrVe|;bW41d1!HySEwGr`(iot8st|v6mI~)gh4|5U}S;!3QC|W zBS)do0m<=Qm;@x)G84UDB>aKs8_y`I9F4FxjNBdY?JhkhCX3Q}~ zvY#IZMQISl%fDrbP)~%J5Wt5?tgKJ^2g9&Vx2(gN#uWj0uQ$)YV3%Q!Ll z9*|uwY>VY!Yd*fh^}2&`W}tHAEmuE6l7;d{LGb2m3;>qOaglx$a%Iz1*Ashmb5LFn zxFYtZ3OS##Oscb?cFhWz{g(#OpM}o)f&e6!ac+zmI`0cp;f;~Ybw%dJrt8R(d_F_G z`>bXscF&?>u%_Ipm~!=xVgu0HACnbv&WOXexGP_nT|%@-5yVY?a|#(?gCF_(Y?qJX zDUH5ZOLw+OMCwa-vj0JsQW)O@2&@0vRHLGPr3N%zy&VAhMGNe*`bYpyM?ac_FF$j# zQSl$gHmkaHhg@%0&wxVg0S^R*l;@OLU#x#%Hck_}J)u?6y)ik<=#yuRH@PdnR{SAB z{+ZEGS9gnXBi+JyO5?vg%A_e9f>ICjs0|WEco6| zDwxZOQ*F>Cs~>gBBE67tK+sJIu5;e*g999 z0Jx_ypC6jiJ$)2HHTDE*+P;Y=wq4J^fnoAI&YpxF&t}Zi2dt0D-YrzlElhA7nDPk4 z`k9U_YAKIZKeGF#jXVZKNG9hm#eb^P0;k(Avz-v!LI7TJx*K(xyKSo}+qs1QnVXIz z`D`U%6!u`8EidH6hs2KEXoPsvB8PJVJW2~2g^_w3gPA#z#{}`^9UXW;^|rxYpilZN zGh+ea5<;8+$OtDotmzxidU#TUF~k&?Ig+coghGpSlJ+!nOn8dnvlDV*O`UrG#JpL} zkE?m5aM-#-6V;&tV>c-ud_U;i*d__WFoD>0Bc?1c^DG>+0D=xL-NGu|?NNHJ?pvWQ zVC6L9lR3s=uy$U@RdU}d1tzhLT-b84Njobzd!E}Ad`mVW?9mN zap_#_T?OU7A8PMxKe?#~sn_0l%0Mu`xSO3il@%@Oy! zpe>K~Bxn@@jNv|W;DiLCxQQ`LhXB)U_CNT76)2L4rvB|gTV|_HL5e_TD*-|LEldJl z!3XflP9Qb@!3mT7|ClhVfPb2IppNpWO}Dep>@~qA*=AA$9qHq&XD+$W-Jox%m7vrB zDgL#+&m0MSQIbhS1i+7_EzAkPpHImdei}|sEetzLl`^2onWn#`q42Y=MBItu)inT= zEak;q?x|E-p2%)=Z*%h(1R1k6fj`FqE*@3TuW@)Mpkg0lvJs(d?9!%PQSi)6XM0N$t z@wD_{+>KT0)Rw-USmZoTfC*f(A59VazrhnO0h3bY0n$*Dm0Gn%{?A`DnY8t{vt)zZ z`o9%qBJq@cgc-k(5J8izSzhhA2D7R2&rw2I0B^S2&QM|}!3pAO!%RTgqLoUPcu{OH zW+J88l?H!5iiCQDNwNo=kuq&;romGk%;L9?ak`*PFd6lFftN^k*U%SVyD8%Gqt+1* z5ZSzJ9e`P=h6`3+&enHGytb&?C}f#;`S)MbE$?q9m*CthlvF+4au5-JMk-bK-d<+` zdpEz_obKEEzm9bIJr~7S0edeOoxfdO;|cq`Yy*21d%u}8@cBReRrFsI^7(tCIa1(m z__ACyktO^Jc{>NLe-4iq=Bf{TdpMlgf9&BqTJN|Sm_P6GeL7r!ruKh*`~Pgrjr_m8 zv|TLC?acq%o*M@^fCLr(c}ql|ICXLv9tw(#;D7s~1N(RX@3jAZ|J!Z*R9nHGkQ=iD z@4#Q{q$APnn3A0Wu~BG?#tlnCE{do`-}?J|_ZBib{#M-K8j-+UtEo?cYhJWV2r>Z% z(}vq%$#j@URAMJh29;dv!69-c6GN)A^v&(j3-p#&r(0vO4%1P1W2?a`{n96r&#)i9 zaUMOmO9#g@KW8S;qQ5t%J9}_ud?{-q^>ctf9m4+zfqhmx$&Mra^y5^Go+7}Qin#`@ z=)$(wBZ$gpSDAX}0PcmRzE-^g%z#W$a$Z0S-PA;bjb-HTzS*eQ<4zw6LZT(5FY+KJ z{A`0lzL)!{#XZJyFq}ZljlK6>!MQROmB7J+`s==?Tb0`6XJd(71l`|Xv)@qKddV`r z9`br}GJmXCAiUi(I5dPU)*HOlef`p<{WJrG16Mtr5q(%!v%YrDg;EavN0`s&UZ>;q zXDIiJeU;;yO|d^}m1xV`6UdMCKn#be9bxxj0Muz z6rY(T^t5|(xjiE-yx+*0A>v7;-~_x@;D^J_cnW!KxyqJ5X&AwsW^ z4!hUm?Tm1yuf^uqu_0f0-Ia;nGm^#Uc5~A4f+=WOG8Sh%QJDtZ^gQo1p}b1<`-_^`5cI&};1?uShM0v(`P{x; zrw}Y_gcr5%w-9621%rXMan@o636v+hH%a@>W_Bt06f6CZ-wvNLE3%ma%* za-0qAa*dWh5sx3j8CS5AounFkq!LtZzb054aT{FU?rZVH(CG8RJ`<*$66E}3ZwkRZ zW;*H+>(aD*^RJprzq?vq*e$>bmMi;h+BdY`3ah9!b`@ zQKOC0!spca{ubLW>5T4Hwj`peN@scW%jIRy4OWguM0+r+`;=IShF~^M>7%X$rN<+# zOs4%qH0`H>&=d0a`_K!yNoAs9RC^@UvV)o3ehYy`-=zUa6HzX=&Ttu4|jQNdiFK_XJ5rkz^Z%rRS{{AAWC{>z+ z>O!Lyr`G(ng0oP}ZEPW^HSEe!q7EOIxF6@5sIs6vKd&o=jkOckCM*C%P6yiLQ9++a zE1gpqsZ4Z+KwKxlDv2Bt&WjvO>dN{v6(;j&(co&Vk!UW@;5mxEN#-$(tWWo)JB@fc zO-`R_+u-2ZW3uv&GxNytXH%23XK}V&2{eA{$1HW2)^u|e|0snQ?=sSzOHt9Pq@`i9s7^22otI#SW2j@P5hqR8C#pwrc;L*uJU4@2#+5 zUL{2ta{HveaDUS1Wf?zfLumN6^h7J*a!TmkNK8K&!fjpVZQShNph9qacVg`B(!bs$ z_+Y+)`1>o8Vesl7OX2Fa*Y{YUR@@35JGC(VK~?Vgj>}0<7-hkJd`EQ!=#nn<$kAfa3Im-OM)u?`eEFp?e%t#J1E131$3!yA&O$c z1EM{W4yOYTzktsg5g5uz!#MD5{C0D-XfN-6?bQnkX9_+)>?q!U7V<|#m2r_v-$&0m zP5(uYDKyo&@{_)mzKyA!{fKJ?#zNY_t5$NK=uT<>F!d`J%dF@BHTDirmMzP=aM@Ou zZQHhO+cvsv+je!K%eKufb=kIEfA!w?ymRip_r3Aw7;DXi{37Oz$c&LQXMk&D5b@j@ zTRX?~X+jWuaY~YlSDAqh^Zq1w$Z8^U$OJ_Ng1J(2Q3T1szYmSvTBj^n@QNLp;x!UF z$ArL6w}=k(mu8tsKDKU+#nkt5W}3UyOpp7H$w=g^U5H7bQ%@(V;gM|BFv+x>Tsk=_ z>NP>=&+%xQtY9-ttgP-wf1h^OTJ3ZpX=Qmv%EEQIMZ~bm{Zs#6E)JL_@HY0ZMCcV9 zKe6Cv@4+xTd()%u-)+%X{LOm51>GqE-2J0vH#~ak4Y)%uyb`zGUIrnZ8E@p-MEQ1b4hA;deD>NMdsks z@RoCoYBHeP(9(RPI3PuGs#r>5^VH=|ElR5Oj%HEy4vxm!iXpo@;s>@F;Ph9@F9r`0DfXEOp*vh z(`H2N&lwA%vV@t@S^T0{9O0F$f^o($VcZ-IuT$<2l>~q#9+I~6+HnzGd>anu>LSec zP5K5vo5~(1so20W9nmEUHkd9|0#BJLZhLeZoHbtK8bA6Zj0>U2;^HZ@ z1~hDvz@x0>6Yii*WLI0|g$<7d(<_@Iv=c`gT@bg^Y+mq+@@XEvC1Ys^byqT zfs?NS0h^BjS%dQyoEjj3ar(SB!DYQ$v`EK$w@bfLob^(Gs*dRahpS|8gfOFl8ZiTwi4Pu+e}JL2+VeeFo* zF&*;o$NAWCuhjxW+j$uPL<~Y0EkYQ<<}t<91cPzO!!d#nV~prPw<<-TW4W_ULj6J< za;2U_c;JYqO^GqzMU%nMHw~x9bT9l>9*pf@auJmIXmUfQM(Q7hD4q{Ad04FCY!Hgn zMYtb+O9LbWDQ~*z7&c2F0&}Wg3B*5|tyDZ9%NM#4E;K@dy5?X2v7NvH5m{AOMi;vV z5vn18mZO}dWzfGmCUDSJp>oYQyrJSh5nTv6qCb`ugxs2CC*dbaGTC&4zt^WrwKNhz z2Y95ur~|l@Ae`|?gl$Gs399BB6U&kx8-RDf&bUb|AYO13Q3jQ3b(6IllA9r~3ve;_ zeK47O*+p3~!=V*G&O8Z)xZD--H0UZI@-S!#spK06ndk~RPMHQ$7&#Lmon>DOwoh7M z?Q+}jH}o=!ed;M~ODjPe+C;L}`Jc%Trih0U%pT;vEr9mV0zU;?8dZ*v@8=v7#%*m# zPK+&hMw9V%&~=g;(oQQ7Y&#nQC1@S)=B5fb;;y2kpS`j)Ert`b$hNQ7NxbU%s(TXe z-EApw`3QXe{RkWP5KUQ zoj?2El2WfYVX-sX9=nsgY*pdVqxUdPCHNt+i3@Ly1SsEC5z80eX>okgK)N^GDAuqY z29EU&k$5m^JyS1d-U_^LW?FFF=oWuZ?aMUBn@As~aopojzK0fWE~5S_3V(w`Od_^l z!0NF)@UvB%t=C>3Uuz@C7|CA@VY=}hrP!R*#*uBm$JS#u4X>Y^H(lHRP@JA>r->>u zGkKRIDPUuHUX@HPcM7+Au+`V>tmO&;D;=eG=(MQkY%`Wj&iAALB$fyxjDjpMH(q+o ze8G;~YHTlZ+Vr!9>-~LpZ?r!Velz#|1sT*acmU`ASh`kL_W;?Zr9|Fr8c+IyCxTc$ zg8@=KM`93D0-ucu@@-xi>1gj`{oC|~oX`Du;^W4?v@jcD8nMq96@>*gGos9m1`(%6 zg=XKtXz4JbW&>j%ZD=r3xHtEjINW-tspAulxI|@iu=?z}$zm9R+lB8%Hn%(PyA3vC zhj)gAgwfMY>~wGYuh`A#uQKw`q(QmfGCSGdUq;WK#uGz_={CKrTqP^Saeio(e!i@Z z&aT!<#&ZH_?W`!{`Ev?tbmS%xgr;_gXQcbsJKt>Mq2%GBBK;87EsJk(d)TW^blV6K zPVf4BRF^M;_vX+H;1$-QCRBu2^d|u@i{%4alD_1YEkm|YPQ3}k1PArQB%1@XwUR(l z%wLXn9|v?II*W{hC>AAJtO_@Qu)USc$ZykFKx_qu_D0HAUU!hcUg2W`e7Kg%5Lh@p zsvl9wP>%xWl-_%yql?8GA|!OtUl$}~d_S7Hn~o)Y^kr*xb9?FiR$DoO3qTz&Q#FS# zUC}U7*2e5aY?1@2RfM{Bn6ibhvfltp*xnO}^!u1Ew#1D$_G$+e5)J*PugJJb2A3zY z`kMsE=^GL%nVZk`&T*o;a8Vp-v3$81LD4|{Qw*Pwr8tWeH`XoQW{wGL&r2SNCnels zlsKb(UQzmdR}2f$4J;ZE$z)MM_;EqxcOttu=^;8)SS!+AP78u0cDA3$Ga9$=1U(69 zL>Blz5oQ8!V0(6%62KJSdovK9A>o51G9>Q7twi^zxqlN_h#JaHsNPXS@bSiRGNXR| zOdSw1U-e@@m4(Q0F|H(UAgiTPpb7Fo+=*-fdogNcv5k4;$3cy$aMPkg-0u}|nDc&G z?t(}>k*MK^p6V{1qnPz8(>Eh6li|%lIXI!UV--90=63|PS=Hayr$@(5b2h8s+oweC z>e#%=4qFWixwdw!lT|iST2?A-T2>*pOG^_k2aB5PN7lAB8m1~0u0CM*FrC{kCjyh~ z_bI3Ozo6gZ`5>zAw(?zl>Fqrqjvuu%QJFk{u`oNj?DeStvZMrb_f9pcTtNHBj%Wie z@A+B_o1k?2d+m1r_XV`sPcg3Gfd~X7^>DeP z+7$@?kMN_=Cv#~gVo@u>QF87tg`{ftj=hBki z{rko2FMqmR?vJyLz)N5I>dV<(x}3M2@!{^bvyJZ0x7$l^$7(zMtL|Z<#&vzHJdr+^!Dm zeH=8$e)#*o9QEU^oqjrRjep32zlvB0uFdmx@-2TJd%uXhb0$KDxSi=;E7kl$_yzk2 z+@11mi6Xw@#jD`jVwORVz~^-h<+%`nrT?OzsSC0{+|# z6V0x3+h!snG5BD?)N5xfh^5wR?14O!NZ_G`g>CAR1O^3K9N8%+%P`DzU(|TXCzsOa z?LF2fSJOuI;Jr1APX|qlXroWIUeU^iCSoq~;6B7n75TTfm#|O&`ve^v#j<6gkIfpD zs>n-2vgC7no;VJOrb|$qEVX2p&U z#N!M-^+z_Sk8x<2kcEiO06Mn5p64WU>WGPQ^QgOLWG#W?G7Y=1-)$QJpADpp&=;^X zR(mq8F)x!Yw2T+x^$Bg>t9M733b`GC1$Rp@TK1}F_nP!d--=m=ShfpQS*}Mx^o_E* zjU&Rjq1iMD^u2y(@*=2x8&FCZm>;GUtRJ3BntudiDnB1kH2sFkwI3STX+Ri!=Xg5U zP$uIZ9KzO&0+Z|alPD20_A?GM0&D7HLXg%DL7kmEyJ4twV5W{Z_b?8-cl&3z&i->} zp~e}fInq2xz+K9t2k(0%&m>K(4LMAcLD<>#z92Xb-)O651v8*8J zT?=~9(qiSv0&cY5=Gp||1%B|rSOhHNxZ<2BIS@dKT@Bs-dB z8r;Tn!^m_%VMemWp!>vCak;vV0YK+jOxmO&oNDcZ{fk2`9~0UZ7ugES5qB6%9SiAi z13t{B`+hq8+};^X_q#i{*o*sSvO3*MyGfReWFqj7d*BSZOw02ahJ#MY6|F3|x_){D zelQr=T|NngrzH!zWTfLLzJz0WChu-J)QQ#_vMoVhXEt|+!^@bL1>jls;j@A73p92H zy%pB<#LaeXEI&u!BhSFJ7^w|DWwu> zj4Ru$cT>>PsQPxg^NjHOs{nv*P*ZlmEzwn^C?w3x;uv~m_lSC2&|zvp7nP|a|JQts z_W9MO(f2svrH-upm3wzI=_Z;7`!j;C7xbb2s7HF|fYQBvUh^k`y=;dg0eh@?|B<>S zxa|_;Li*wkMUgm}yWA_E_lw<=(U$Lv&*K%GZx;x95}dR{YCzFZ@S}Pa^Luk?9c zHOPi3XqIG6k9nv1iCc2NJ`NOYnsMip_vWrt5_Mdf2ftAt4!nf)?*wu}VbVTI^KGdx zrH9oPG;>P1Qr^WmM@yXd50>lJU5`QkieRlvKncowE;1E zl`ywdg$~L#&l2L>DsG_39vh)afs=UZ$EZuTX=PuoReV!rSY2Ui`|e(mBQS}aY(rLUv^7G(=9bJ5yiKSDl$xj$QLjdY9-TGBt3u9tax z!(?cQ-R4)o@`DFrinj@Av!}WNJOyZPQDHgP!Q94*{Q-ZgS#h-8Avi+#UAxXZs6T{`P2*hzt}sk>IyKs304 zMMNQgw^AH00E*nWMh$=yA;BR8axJBq!!{Q-kesDsAroSg)Z+EB=LW)GU#rp0RIHx& z+%CKBE-5|~j8!U1EpQT>=p=R$D*UU8%_N)*oF)L`FSnsWBM=1bin$psQsSFSVgwl8 z(SQ~5{#CAmohRggQs3{y&QkO;bjuAR`WLEhRb`np!Ij=j!4$4YC#lJvagz0^)T&MD z%3M$&lZkmYYa};`~k^0MVH=@*UUHcy+#`Q zptgzo9;>6&q5XJ>x=#=VJz8_q)BQb!5QAtPp;liv9WBbYENsSN_^vtUo0ile9B``# zi$LNk{{5o%?2LsuziQP2jl!5mI!18BeK|X>X)W-=ZD6<0OuJuT+K;CNCA4&D#%#$P z*cbAwgKswAEi4JqTV6GtLT~D@L#CaPkJfWG0(AZT5vBscDnhAYZylQ9Fuy&9cdtrpX70hg-#`l|-+$(e2qQ#_BzD@N zPT7vl>W)bckr&)N=^@)A)wsd) z3o;6Ru6xuyQs0TYfQ&TMQtw#zT+7nWyV}ACMWbqv`NKS%b}E8o@g{jD*D05Xx-n(G zGem-OfD98(5S*oJHJZ!PhJ$T%X^kOV^(D&?Mmp^*!##k0`RYpxk21W``^h-$9fRph z(3?-#9M1zDtW2;+6*nRJ)VOkr8Zg;)8+Bqsp%+v3l~bC85DH=-*(e)7_*^1P2rNEX zA~R?VYB5+4dhHhNOsII+y=U663+Zl0FrJ6ZIl5yvTTxXR(tvb<0txhHwP-tGAZ?c% zv!SqVj114lafuOp60UhZcuNmK$Ykq6e=`BlKzO8;6(D4%Mro^`@SH}$qmDFfJdu%Z zL|e*SQX26BFLpJYo$+@@@T_t>s zVGBzwXkr>O6y8vg$j3|T7|e%J0&VvfotVk4LQsQsaKfmijF*7JD?3hD>-!0``tEaH z90~=t>KE*d#4E2<`#IqNE~L>;giGH$`c=W9P9@6{7LTjs7yP{Qv6>yi4}Ol(b~oMo z3>M~&(^^>r*W$wus?*wI@3d;oSQkryfgkJC_>04xlQ^L{vwBBt(wnhNGLk|eu7N+~ zh!&5>m#P{mT52AEQ5M*bM28nJeoOO|)#ul>NEJOFCWEEoot9fGLF8F4SLd2LH;^Q{ zX&|+NV!>21Y@4FL_ZXw67?#w2Ym6ggnh&V~eW*5$wdHd%!2jNJ3s!8?-7?;=CY;wzovi%*BLXG4{b4G0cXo96gvqGBqn5mBK- z022QMb-mW-@CZ`>fq{~6#uiWhTp-C*_{nc3d?l=pQ|&%Ksz#iJVK2?$or&nhopyQL zeGsyiiOx0z!mW(sKy3Bd)P^hVw}3T$T9@^rT>ccH{mA9J_UJUW(PKgE_`uO+_#sIQ zDsvK#Snc+`wv|;BMWI^=4bfOT?!%NrxuF_%g*6Yx(`H@7xi^#DJ^?y|(xR9F=Ymls zSCThH9R%cgWOAFKp++bnBh?GKU{KFZzp`1-W8Sy@W0ZRmMQxS}eFn=H39m{ik8Kyr zx2A~tGl{Rdfq?JPT4=^egc%Ftt;S;xWZSdlJh|;5(#+BZ_sO%IuLx! zI4z$j6Sgy);kKA`7{FUl)xK4xt{QN&RDTm`BB-OQ_3A)Az+DE;r%#XVry!QP(SOc! z=9TSSh-Fs-|6QS*2$o-V0V;+O3VIhCud#>!P+f^jkk9wL5sLTyi6H#=}YmkFpN^kw0o%p`f-VL-I^PL!`)v{1Hl1>a@vf0-P8LVCgZB;y!AVznJ4 zuu-LX0_vbq3Q}0Q2fARiWMsqpO9(cR-Ip*;T84;nrd>V82!G7^2+6IiV2WJVa5{$E z#TL1CKdh~mJwaP=-^f}{=+cud7*c|(!v|rOPb(Y^Rut@};By}y3;HZBfl@95WmW|$ z`uWF(&+yhj%B)C4td(HV(`@?{JV9W`&T}vPjLr^NB!t%08LFNHiwVJfCrGx`os6q$ zW&-TrdJIPS82-UwgS}%;1jkPJ9E5_A%2gH^s@CpK!`=>gLYKL8$__0+tLnmNt}Swq zI8E5?ikZJ`?oi+J5~N;QuRC8o{nAJXGnU7F(5-n3A3sfj6s3Y~l?-AY2Oyo7mYu>* z;z?-DE8asIiI-?&sDYY=oa0NZ0M_E15CnH`eUPUfI9D>l4Xj-?N#$EynVm(kxfDY^oj}?4>XnS}VXG-!T1=G^?>fnE+ zxD=CfJ#5FG%7=dc4b91E)DAp!4EGLZ3Y_2GJ2e}QbreBd(+B|;f8~L=0VL890xK#O zwY8xfBVS;3X&6V-7Axcljiry8`biSbTSlbC7nz`)C_lj+<{YL6O>T=erpOQnu+jxL zwUmeOEF`{>r!d@0X>)=|;K(a-B4fuCh^%q7mPeDu->58rQ}fZ@N&C_BksQFH1RWDr zh`#9>YFpJu8G~`%r2ndexJrz7cupp9KG%8T|410z_RX3-Zt}D{aCbq67DepNd=WM6 z;nZ5J&e90f*9?Dy>2i8LG1~{Py6w{Sc9cuo(H`w{j11dQM4lYQuuP7OBQAfhocy?0 zaNj`K_c(+(Px+%n;+VO8jfHMD_S`b{XSY%R^h-zcS|N^0w!_yZ-uw#AcpM$kso)G) zZ}nISyl6U2%j1;2^C(Ma0-`vO(T`3|m1?+e86^s()H6jUX4DEdtnEiOaD_jjnmGfq zNqHgauLj=Ama(PyJd-)mLM-NiDh^z{Z{Fca- z=$INHI3(e&gqYfTTd5NcT3Mayq8tWn^4N=-Aup|ZQYESng#YbyD$OVHT_&$_!_FH% z7F7vC-j=7`w1nsV!W(*lH6mC z1Co?7`Bmc5=?-C=4#(S2Ey9ur0Z_|(TSTd!8BH)C#$HHWvv6}}B@%>DgfCKKSfGxWs5a=zj#u>ON#Ag&^!f^E4>5=^paIsEAuK}-fBU1wzd?-w+xn z`Md}0Wt6ccdr5B22bW8l3+woy^~Op9mQ#8hLTwV#(aaTe9)WMiL<*Ib$qTHb!tbqa z3};uDUm4MyI&K|Saj~QM**Qa+DaGCORM25@P$Ntl5C!4N8d*SuVu4oIntNcp`mf1x zd!+^eTTRbdj zp`0Os7`~E-JfGb(Zxkp2_d3sWx$yUq^oEzgIDse=(L&Nl9LlxL2XxNCNh6x#zu>HJ z5C9gyM1Y8xpxXe6G>*`Z!4@zzcNRj1srlM5rg4kz(+8L7uA;H4WX(|U6*b!ZIcmMIDf)Tg)aIYM@tCTjN)a>|xl$YBl& zY{;ieCg@f50&f+HC_^>2ZAb_NzJD5j_T(;ipD?DgA7*0$(47-8*^fWk=uv1s2@Lu; zGzvKqlv5Wm31T?{&$}$kYXFD80R+#;!-eJna#ql)DBMF?77KNxdKIbH*VroODut{S ztAM|@dIo!)6vAbniYo&F87aV{Q5F|V0wr0GY1%iuK#7`(8~{N=;41GPeLY{e&HdOj z<5Oy9Y9bgSq!D)MgneYq+v%*S10!J+@aW~5@E8ov`wlmpwPbaP5jYUZ%-6K}ONFeQ zo4#IIKY_`4%tfhKMX|R5@+Pbw8#QPo!TXHf6$CNH)Fsn$yztz3b-$=CEbRoIGSs<` ztn+p&bgGS>6A2(H_8_p60EMZ&?L57~uFCQ)kbs$y4kFH0WlwLN9 zy!;0j6%SBsiD;gSaWQL!3=C`9ysd0cgV7;8Ob(kyLzZg#OhcyTf$&H-DaU!{Kn(u+ z)9mI>E=H-LVVtrzoAUzz9WhB9x755$y}QZ=pB_@kSKppgRM2^|AnayK_6UQa*|(#x3kCmx z_;~Vj@jAwX%#h`05r1ATHE9@|SJ~JKECc!|X`YVKm=Wr+oXh14rWc?jRLQ5Aj{0BHkXZqKPiXvu`U!$U$KgVOnP z--LKUC+^M4k2@^RE|if`rLvg=+I=XJ?`@%HnGqtGjaf|XEC5rbi;sDM0|2_xI-Ey2 zg{voJ4fE_USbU6bk5^^qb@+o%FRMOGN&SgsK5<4OYDVT!5IYNjOGPg_2eX21+o6L% zi3 zsg7pzS=aD`p`DNfT#;CoNn%xeE8b?w5Ma|}7`WqCnnFBdxQ=(3ruZLKA=ZKcGbgjOWqnK)vx^Y@MhT`gY((1JWb0=>m(NE4jr zivXGJV=|XQ0$yzGh1{+cPpkVi5JJ>5tx@G!IDaM^VcGJSG}Bv>Pk;spw_IjjPT%*8(CfXL)3C1=OS?0LYRhR{qg?>h+>b=Qo=7?M(KkM)gT%0WXAp#4KyfNm8 z{<)xNJktX1sa0u7A@h~Y2QhuNn$IYSoi9eK2^MPzsvWj-$ogyB2YYHVhMUl(ZJk#r z9|aXj#>BKN*eb;y-iTM8nz6rm_|200&P5UdA(RR zLcF-ax|2Jkcg#f=j&bjo@;qOh)+$Zm`bs<@iw6}~?iBJqm7OP9Q&bqOprPjhrdoRZUS9p3^H|?oOBEL9V3M!)ZW4l;PrDWLW;1Ge0@STs=!-* z6u-daIotqd(*S*+P)G2~h-4wkytXxzVJBPD;_EbukH-WW{I1vLa9d057B|lU=l)x< zG<=YRg4VZFJDR$ZSFWJR zloy@rQS#d1@>27yt?54d_5i&@W-~PUomoGzIGb-kYfRloW^~9v zViVc*XYRRFGwiIrQB&|*K(v|2EIUttmDG3v4>@>@@66Q9%X}-KKkc$h7(!hT92$N2ZU2b?Eu_HyJ?RH`qo-Ep z0M5KQHpcjS2YWo7S1Uoc>j;W0Y5zF%48UH8rnkX*sTF;yLo^d@y)CP1=NW5DfAwgE zFNq#McP|{t*eEsy{qNV0tr!?#@;pkEBFP5$8Y((%aa3k)q34H9tf=Orx6w{fPT!I4k1}C z9tnPsG;k(jMN~(i=0YlAfK(DLvgHlx2Z#4#SUNzV(#e(2=w)>px*ZU3bQT}N{M?>T!rY)_AvHo@!yCT} zRYh-xF~}%!<}82F&Y^5I(81%AbiV~PiaZyj{c-p}fA-1+)Vq+V?*?&xGMmgbl*7)k zA7(nDw=S}Cb?F44rfY&3ds*w@OEde+-;=DK>rsUY*kQYff zwEapP(OsD8b5UT5d8ijdbLTNd>Ta8$Y1wlaylQ(LKe>-US&_5CM21abTyE^J%j^KN zmGGsg3QV>ZMqw&G0K=*=E9( zlEg?pNO#auHMI5FC-n%slH$Ej!D<^Oo(|+3bsQjOf}})@{1;a%)Qw2QS@C0FWF@+n z%%f)10(y)(ggayLOfe$AnyxBoeu{-g*we@rr`gZWvj9G4@KQI-;C03;D2Pl6U5Kq*O_qd1{3N*f{$hQ@1B7B?og(&3yA9#S%sxL z5c9FRk3b($TI7)l=Z$i4m=HHF8PN=BhXoLNU zsR67ZBQUEGd7*A`_b0$kM~YTjs}8k$FBXk$-tZeF zltuGH?ITm1ouhwr3+=dJYAl+AMu_!-H*bH+0MoGVZ2^_p*Qi9JzIZJU{eZzXgXOjjbtKZzT8~`kKpr86 zrtGky(?zazz5G6%qpHTypj#<42l|XHC4`&&Lxj&zhTIlT9%+EWHhJ;jsf2*)+=6*- zWxK}wkyNQ4aJB>zg$vitl)e>!Ll!5kl$2e{2Feyd;`sFq=EkRX87N3worG!8tTNU^ zI~WnoJHc^OOuFS$u{tGy)+(`AKBfVZ{n9+8oq-BnC>W@ST|AU7(@|Fz6cbP)e@aJB zSfW+-BUwkGQ7far;oH9~ zZJ=k^R?-|r!0)(y;KpPPM*Ce1*TgER)?+rt2=DLgl#Cnj7_%SWKt=VjfliI(Yb7~?`(Z?IaLo#4}ls%))T1*`EjJ*s;srtY-P6)EC%VC(r zou!&-1$K8#fmas?-W%=|1F0_0%uL%}?4-$Z3IMh5$Sk6EDca_7mmGGG3}W=~^%I!O z-=%?n)<`us&Qa(oHK#DX`MXunV=Q*(dX-2358y8HpuHbb$BbG zNiBO`NfPIKur*{^efc>+L7TUpYFqu#NsAU&ztUJ23W97D`I~YNR<86u>6HjjRXoJ2 zxskr4jvWxUF_8{=&y~5&=My`t=yO&0X20P5&{0K_g0d+QXS6B~aFIzx# zI*2(arVZ;iq+gWbqYi?Q&s0IvFWjWZ@?L z_P3k$Tej(TdI;7b%$d=3Ix(0@d9Xjs8Z2ML2DeB2HJH zkczJEpY5rwOGyYLiBcmRsa?}78ILXV2vd3( ztm0}jY>B_!!x{v zLI)s)D65hIPFEL|R74#^bZWd192@!K8kLuo(CqIuQteWvx&3)pm9BxVL>6ll&|sN0 z`ve=L!a(Bc=}(!>VuB=PM!FhXZhZ}gX8s)n$r|t zgQel^aF(B(DNuCi;sizldFbPs1A8AQA4NF-jFK?~Rk}xbo}LGy*LX=w zcvgn9qwf~DqwWW~FgTtfoZ*pt+fOG!v!w3Ma(k^sG%&)q=7|v%7?G0dzPJCD`PfhU zIKi{QCLe>1KS)qLA%Z8?m@bg46j8|Pls>Y=D2(^);v`xrr({v9{lkLX0cz8uu$d=f zRN(7j0qPi5Z!gOPT?XzGs7tr7Ep9Nr@}M$JPn(+*f3(>5uRDqU7Zx2{EJ4Y)+J3FgCL1fVhGIIU&Fd>)XZ=ndTCN__N zS_`g*E2ORh$hl%Dm~PQ2q$&xs*Ov}$45S18iAG{RZ(Xtpqdyb&fTK_~iHuEZhz*+!$Y`x>vX<69oO}5YBmJZRihP zzo#JeWVeRd`tw2N=6BeIV^8H-@i4fd|}4E40M%I^Pv%7(=-z z5-nzrmF4+!xmZL>Irxl-0$Dq4or+QA&ygFBw>?8;B4ws_LN z>M1i(?J37+yI9AeMg@K^T<%7=7Xbs~n09pG9lvyw46Up(rGXXE4o*{wSlid`cA!ol z)|SPKAjzA2=KuBUu&+0MR;yuLoHK}|;PjK2ad==a#eTA9tcBl#M6&Z*ikxk25RWaF#rt(Wugq?ShX^2QwGFtjhM^I%c*F;`Tl}jZ_-pHVa-a7_O2bWTIt`_&7%0iHBGhM!?YarNadD zLOrxmLxyE_ol4T~s-7k@uBo~DIJ^@)IU}`T1~Xtv(_-fFZzdcl;;p4PzoBbvChe)>HPdH*wPWV@9fy#$|yRP(mI_yZVc?n3U`d zq73x6O@0vX0M9@4FyE!*1YAAjQ0M~*_(x?*aGpPZxw?14h*DWakOcnB@Pf!<+ep@s>}#Sh)N-hSL$*; z@l&*gL{&LpSw8Y(GY-5i401hi>uI`(E(q5YU_fK&gpy)yxpBny=9djA?^cOfzWenf zg&z}I3Q>8csggh0pO}CYmM`biSPF03@rkyu>BVpdD6u(}2_7>=<3Rdp%-&iU2;tMlLd$o>;%U~m6tB>od!o(zF2^MyA4LL>gg z=wD|W{O8wyF*UMtH2L4)@FO8QWM3WOzTkhx{-5?Yb3ow#gu6I7+u8h^!2d4cFT{Vf zKi8oF@|V5&|1TxI?*A}Q|GdtBTCzWk{yx!=|AhSyyY^2y|BRUVHyzXXzu5ne_?drF z_-D}izbTM}{7vEigs}hrzxEs7zo7q0nDXC8$Ujr2{F@_&-+#dVoj~QEr~J=F(!U9q zcl<@*KgIU{I%&8+$iLbj4SW8r;Xhy}a}yKi|2ejEAUE64yZ`_)P=Eo@{|ouI)=K^d e_Ae5D<$sZv0{yc-_!`fM0N`K!g!=w``~LuC&45S% literal 0 HcmV?d00001 diff --git a/data/labels.csv b/data/labels.csv new file mode 100644 index 0000000..71fe314 --- /dev/null +++ b/data/labels.csv @@ -0,0 +1,84 @@ +A/主题活动 +A/代表大会 +A/传达学习 +A/党课党会 +A/十九大 +A/学习研讨 +A/工作会议 +A/年度民主生活会 +A/廉政教育组织党员干部参观 +A/开展主题党日活动 +A/报告会 +A/理论学习 +A/组织党员干部参观 +A/组织党员干部观看红色电影 +A/组织学习 +A/组织生活会 +A/贯彻学习 +B/会员大会 +B/全委扩大会议 +B/听取工作汇报 +B/常委会 +B/换届 +B/提升科协基层组织工作 +B/科普人才队伍建设 +B/群团工作会议 +B/选举 +C/参与城区精神文明共建 +C/展览献爱心 +C/志愿 +C/文明单位考评 +C/文明实践科技与科普服务平台 +C/新时代文明实践科技与科普服务平台 +C/道德讲堂 +D/优秀人才评选 +D/先进 +D/征集 +D/授予荣誉称号 +D/示范 +D/表彰 +D/评审 +D/评选 +E/乡镇街道三长制 +E/会员日活动 +E/全国科技工作者日 +E/征求意见和建议 +E/成立科协组织 +E/最美科技工作者 +E/竞赛 +E/高层次人才联系服务 +F/创新驱动助力工程 +F/创新驱动助力工程知识与技能普及培训 +F/培训知识与技能普及培训 +F/学会创新驱动服务站 +F/对接会 +F/对接项目 +F/工作站 +F/推广 +F/服务站 +F/科技创新 +F/科技节 +F/论坛 +F/调研指导 +G/专家乡村学堂讲科普 +G/全民科学素质工作领导小组会议 +G/知识与技能普及培训 +G/知识宣传 +G/知识讲座 +G/科学普及 +G/科技活动周 +G/科技辅导员培训 +G/科技馆 +G/科普基地建设 +G/科普志愿活动 +H/帮扶 +H/座谈会 +H/慰问 +H/扶贫 +H/灾后农业生产自救 +H/知识宣传 +H/精准扶贫 +H/组织专家义诊 +H/脱贫 +H/调查 +H/走访调研 diff --git a/data/train.csv b/data/train.csv new file mode 100644 index 0000000..5e57500 --- /dev/null +++ b/data/train.csv @@ -0,0 +1,343 @@ +label|,|ques +A/代表大会|,|肥西县柿树岗乡科协第三次代表大会召开 +A/代表大会|,|肥西县桃花镇召开科协第三次代表大会 +A/主题活动|,|肥西县三河镇举办65”世界环境日主题宣传活动 +A/开展主题党日活动|,|省科协组织机关退离休干部开展两岸情缘”主题党日活动 +A/传达学习,A/十九大|,|各党支部传达学习党的十九大精神进展情况之九 +A/工作会议|,|肥东县召开2019年全县科普工作会议 +A/主题活动|,|省医学保健养生研究会党委组织开展不忘初心 牢记使命”主题系列活动 +A/开展主题党日活动|,|肥东县科协开展不忘初心牢记使命”七一主题党日活动 +A/工作会议|,|肥东县召开2018年全县科普工作会议 +A/贯彻学习|,|肥东县科协贯彻落实全县党建工作会议精神 +A/十九大,A/贯彻学习|,|肥东县科协多措并举学习贯彻党的十九大精神 +A/廉政教育组织党员干部参观|,|肥东县科协组织党员干部参观省党风廉政教育展 +A/组织生活会|,|肥东县科协召开讲重做”专题警示教育民主生活会暨组织生活会 +A/工作会议|,|肥东县科协工作会议召开 +A/主题活动|,|王洵赴合肥开展不忘初心牢记使命”主题教育专题调研活动 +A/传达学习,A/十九大|,|各党支部传达学习党的十九大精神进展情况之四 +A/组织学习|,|机关第三党支部组织开展革命传统教育重温入党誓词 +A/学习研讨|,|2016化石保护研讨会在合肥召开 +A/开展主题党日活动|,|庐江县科协党支部开展科普进社区主题党日活动 +A/代表大会|,|庐江县科学技术协会召开第五次代表大会 +A/报告会,A/理论学习|,|庐江县经济发展与干部学习论坛科普报告会举办 +A/报告会|,|庐江县举办科普报告会 +A/主题活动|,|长丰县科技馆开展清明节环保科普主题系列活动 +A/开展主题党日活动|,|缅怀先烈 牢记使命 长丰县科协赴焦裕禄纪念馆红旗渠开展主题党日活动 +A/主题活动|,|长丰县科协参加宣传贯彻志愿服务条例主题实践暨圆梦微心愿”活动 +A/十九大|,|合肥市科协开展纪念建党96周年和喜迎十九大”系列活动 +A/主题活动|,|省科协食品安全主题日活动走进长丰县岗集镇 +A/党课党会|,|王海彦同志为长丰县岗集镇龙岗社区党员讲党课 +A/主题活动|,|长丰县岗集镇举办食品安全宣传主题演讲比赛 +A/工作会议|,|长丰县岗集镇科协工作会议召开 +A/主题活动|,|合肥市庐阳区防震减灾”主题科普嘉年华活动举办 +A/工作会议|,|合肥市蜀山区科协召开2017年科协工作会议 +A/工作会议|,|合肥市蜀山区科协召开社区科普大学教学工作会议 +A/代表大会|,|合肥市包河区科协第二次代表大会召开 +A/报告会|,|怀远县科协举办农村生态环境与健康生活科普报告会 +A/工作会议|,|固镇县召开2019年全民科学素质工作会议 +A/代表大会|,|五河县科协召开第七次代表大会 +A/工作会议|,|蚌埠市龙子湖区科普工作会议召开 +A/代表大会|,|蚌埠市蚌山区科学技术协会第二次代表大会召开 +A/代表大会|,|桐城市科协第四次代表大会召开 +A/代表大会|,|桐城市科学技术协会召开第四次代表大会 +A/十九大,A/贯彻学习|,|桐城市科协围绕提升三性” 学习贯彻党的十九大精神 +A/十九大,A/贯彻学习|,|桐城市科协召开学习贯彻党的十九大精神专题会 +A/十九大|,|桐城市科协组织收看党的十九大开幕式 +A/组织党员干部观看红色电影|,|桐城市科协组织机关全体党员干部观看榜样 +A/传达学习|,|桐城市科协传达学习省科协九届六次全委会议精神 +A/主题活动|,|宿松县开展低碳生活绿色出行”主题宣传活动 +A/代表大会|,|宿松县科协第三次代表大会召开 +A/年度民主生活会|,|怀宁县科协党组召开讲重作”专题警示教育民主生活会 +A/组织党员干部参观|,|怀宁县科协组织党员科技工作者服务团开展听民声送技术活动 +B/会员大会|,|庐江县举办茶叶协会二届二次会员大会暨庐江县茶叶公共品牌培训会 +B/全委扩大会议|,|庐江县科协四届六次全委会议召开 +B/全委扩大会议|,|长丰县科协召开三届二次全委会议 +B/全委扩大会议|,|怀远县科协召开五届三次全委会议 +B/全委扩大会议|,|五河县科协七届三次全委会议召开 +B/全委扩大会议|,|桐城市科协召开四届二次全委会暨全民科学素质工作会议 +B/换届,B/选举|,|桐城市养猪协会召开第三届换届选举大会 +B/全委扩大会议|,|桐城市科协召开三届六次全委会议 +B/全委扩大会议|,|安庆市大观区科协三届二次全委会议召开 +B/全委扩大会议|,|安庆市迎江区科协学习贯彻省科协九届八次全委会等会议精神 +B/提升科协基层组织工作|,|铜陵市义安区全面完成科协基层组织31”吸纳工作 +B/全委扩大会议|,|铜陵市郊区科协召开四届二次全委会 +B/会员大会|,|黄山市休宁县老科协第二次会员大会召开 +B/常委会,B/听取工作汇报|,|中共休宁县委常委会听取科协工作汇报 +B/全委扩大会议|,|休宁县科协召开五届六次全委扩大会议 +B/换届|,|黄山市祁门县抓好科协组织换届工作三长”进入基层科协履职 +B/全委扩大会议|,|祁门县科协六届五次全委会议召开 +B/全委扩大会议|,|祁门县科协召开六届四次全委会议 +B/全委扩大会议|,|黟县科协七届二次全委扩大会议暨2019年县全民科学素质工作会议召开 +B/全委扩大会议|,|黟县科协学习贯彻省科协九届九次全委会议精神 +B/全委扩大会议|,|黄山市黄山区科协五届五次全委扩大会议暨2019年全区乡镇科协工作会议召开 +B/听取工作汇报|,|黄山区召开群团改革工作汇报会 +B/全委扩大会议|,|黄山市黄山区科协学习贯彻省科协九届九次全委会议精神 +B/全委扩大会议|,|黄山市徽州区科协四届六次全委会议召开 +B/全委扩大会议|,|黄山市徽州区科协四届五次全委会议召开 +B/科普人才队伍建设|,|黄山市徽州区科协四措并举”抓人才队伍建设 +B/全委扩大会议|,|黄山市徽州区科协四届四次全委会议召开 +B/换届|,|黄山市徽州区圆满完成乡镇科协换届工作 +B/全委扩大会议|,|石台县科协四届二次全委会议召开 +B/全委扩大会议|,|广德县科协学习贯彻省科协九届六次全委会议精神 +B/全委扩大会议|,|绩溪县科协召开五届二次全委会议 +B/全委扩大会议|,|霍邱县科协八届三次全委会议召开 +B/提升科协基层组织工作|,|六安市裕安区召开提升科协基层组织力31”工作推进会 +B/常委会,B/听取工作汇报|,|宿州市埇桥区委常委会专题听取区科协工作汇报 +B/换届|,|宿松县召开科协系统深化改革和县乡科协换届工作部署会 +B/全委扩大会议|,|濉溪县科协召开四届九次全委扩大会议 +B/群团工作会议|,|全椒县科协认真贯彻落实全市群团工作者培训班会议精神 +B/全委扩大会议|,|来安县科协召开四届二次全委扩大会议 +B/群团工作会议|,|定远县科协学习贯彻省委群团工作会议精神 +C/文明实践科技与科普服务平台|,|安徽省科协 安徽省文明办关于开展新时代文明实践中心科技志愿服务工作的通知 +C/新时代文明实践科技与科普服务平台,C/志愿|,|宿松县新时代文明实践中心科技志愿服务队开展水源垃圾清理活动 +C/展览献爱心|,|太湖县中学生参加 2017年参观科技展览有奖征文暨科技夏令营”获佳绩 +C/展览献爱心|,|黄山市屯溪区党建引领科普爱心漂流书屋”活动启动 +C/参与城区精神文明共建|,|黄山市徽州区科协助力全国文明城市创建工作 +C/文明实践科技与科普服务平台|,|广德县科协召开文明实践科技与科普服务平台推进会 +C/参与城区精神文明共建|,|广德县科协赴联点共建社区开展文明创建工作 +C/文明实践科技与科普服务平台|,|金寨县科协开展新时代文明实践科学传播活动 +C/文明单位考评|,|六安市金寨县科协积极开展文明单位志愿服务进小区活动 +C/道德讲堂|,|来安县科协开展道德讲堂活动 +C/参与城区精神文明共建|,|定远县科协发挥科协优势助推社区文明创建 +D/表彰|,|肥西县科协召开科普工作表彰暨全县科协工作座谈会 +D/示范|,|关于20182022年度安徽省示范农村专业技术协会认定名单的通知 +D/示范|,|20182022年度安徽省示范农村专业技术协会拟认定名单公示 +D/优秀人才评选|,|肥西县第三届完成青少年科技创新县长奖及优秀科技辅导员评选 +D/授予荣誉称号|,|合肥市科协荣获2017年全国科普日活动优秀组织单位 +D/表彰,D/优秀人才评选|,|合肥市预防医学会开展结核病防治志愿者优秀个人与集体评选表彰活动 +D/示范|,|关于命名20162020年度安徽省科普示范县市区的决定 +D/先进|,|坚定信仰 乐于奉献──记省科协离退休老干部支部书记贾轩伟先进事迹 +D/授予荣誉称号|,|安徽省科协获得全国农民科学素质网络知识竞答省级优秀组织单位”称号 +D/示范|,|关于20172021年度安徽省科普示范社区认定名单的通知 +D/示范|,|肥东县发挥基层科普行动计划”项目示范带动作用 +D/示范|,|我省18个县市区被命名为首批20162020年度全国科普示范县市区 +D/示范|,|关于命名20162020年度安徽省科普示范县市区的决定 +D/示范|,|关于20172021年度安徽省示范农村专业技术协会认定名单的通知 +D/表彰|,|庐江县科协表彰2016年优秀科技工作者 +D/表彰|,|我省20家单位2015年全国科普日”活动受到中国科协表彰 +D/示范|,|关于命名20162020年度安徽省科普示范县市区的决定 +D/授予荣誉称号|,|长丰县科协科普宣传惠民生项目荣获2018年度长丰县优秀志愿服务项目”称号 +D/表彰|,|长丰县召开第二届青少年科技创新县长奖表彰大会 +D/表彰|,|长丰县举办首届青少年科技创新县长奖表彰会 +D/表彰|,|合肥市瑶海区举行青少年科技创新区长奖表彰大会 +D/示范|,|合肥市庐阳区科普示范创建工作喜获丰收 +D/征集|,|关于转发中国科协改革工作办公室关于开展礼赞新中国追梦新时代”改革创新案例征集活动的通知的通知 +D/示范|,|关于命名20162020年度安徽省科普示范县市区的决定 +D/表彰|,|怀远县科协系统表彰会召开 +D/示范|,|我省18个县市区被命名为首批20162020年度全国科普示范县市区 +D/示范|,|关于命名20162020年度安徽省科普示范县市区的决定 +D/先进,D/授予荣誉称号|,|怀宁县科协纪检组获纪检监察工作先进集体”称号 +D/表彰|,|歙县第二届青少年科技创新奖表彰会召开 +D/优秀人才评选|,|黄山区全面启动首届百名优秀人才评选活动 +D/优秀人才评选,D/评选|,|黄山区全面启动首届百名优秀人才评选活动 +D/表彰|,|黄山市黄山区5名基层一线科技工作者获五一”表彰 +D/优秀人才评选|,|黄山市徽州区科协委员桂利权获评黄山市第三批专业技术拔尖人才 +D/表彰|,|黄山市徽州区表彰第三届青年科技奖获奖者 +D/评审|,|宁国市举办第十五届青少年科技创新大赛作品评审会 +D/评审|,|泾县召开青少年科技创新大赛2018年度表彰暨2019年度动员会 +D/表彰|,|泾县2016年度青少年科技创新大赛表彰暨2017年度动员大会召开 +D/评审|,|六安市金寨县开展2017年基层科普行动计划”项目评审 +D/示范|,|金寨县开展创建全省科普示范县工作专项督查 +D/表彰|,|六安市金安区举办第二届青少年科技创新大赛表彰暨第三届青少年科技创新大赛启动式 +D/表彰|,|马鞍山市花山区优秀科技工作者表彰会召开 +D/表彰|,|和县科协社会组织党建工作获表彰 +D/示范|,|和县科协组织召开全县水稻麦茬免耕直播千亩示范现场会 +D/示范|,|当涂县召开创建省科普示范县工作调度会 +D/示范|,|濉溪县科协举办优秀学术论文颁奖农村科普示范基地授牌仪式 +D/表彰|,|颍上县召开2015年全国农民科学素质网络知识竞赛总结表彰会 +D/表彰|,|天长市举办第五届青少年科技创新市长奖”表彰大会 +D/表彰|,|天长市委市政府表彰第二届千秋英才奖” +D/示范|,|天长市新增五所科普示范学校” +D/示范|,|天长市科普示范基地首家农业院士工作站挂牌成立 +D/示范|,|明光市开展防震减灾科普示范学校评选活动 +D/表彰,D/优秀人才评选,D/先进|,|全椒县评选表彰基层科普行动计划”全民科学素质工作”先进集体和先进个人 +D/先进|,|定远县科协获评全县六五”普法依法治理工作先进单位 +E/竞赛|,|肥西县首届青少年信息学竞赛圆满结束 +E/竞赛|,|肥东县举办第二届青少年机器人竞赛 +E/竞赛|,|肥东县召开2015年全国科普日暨农民科学素质网络竞赛活动动员会 +E/竞赛|,|巢湖市举办第十六届少儿智力七巧科技”系列竞赛活动 +E/竞赛|,|我省代表队荣获第十七届全国中小学生电脑制作活动机器人BOTBALL竞赛冠军 +E/全国科技工作者日|,|庐江县老科技工作者日诗歌朗诵会举办 +E/高层次人才联系服务|,|长丰县创新创业高层次人才协会成立 +E/竞赛|,|庐阳区举办第四届青少年机器人竞赛活动 +E/竞赛|,|合肥市庐阳区开展首届防震减灾知识微竞赛 +E/全国科技工作者日|,|怀远县科协召开全国科技工作者日”暨科学技术交流会 +E/征求意见和建议|,|安徽省科协集中深入19个县市区征求意见建议 +E/全国科技工作者日|,|桐城市突出五大主题部署全国科技工作者日”庆祝活动 +E/最美科技工作者|,|桐城市开展最美科技工作者”学习宣传活动 +E/成立科协组织|,|桐城师范高等专科学校成立科协 +E/乡镇街道三长制|,|黄山市休宁县科协推进三长制”落实加强基层组织建设 +E/全国科技工作者日|,|祁门县科协系列活动庆祝全国科技工作者日 +E/全国科技工作者日|,|黟县科协开展全国科技工作者日”系列活动之科技工作者回馈社会 +E/最美科技工作者|,|黄山区科协四个强化”抓实最美科技工作者”学习宣传 +E/竞赛|,|2019年东至县中学生气象科普作品创作竞赛圆满结束 +E/全国科技工作者日|,|绩溪县科协多举措开展全国科技工作者日”活动 +E/竞赛|,|霍山县科协举办全民科学素质知识竞赛 +E/征求意见和建议|,|六安市金寨县出台加强全县青少年科技教育工作的意见 +E/竞赛|,|六安市裕安区科协开展全国农民科学素质知识竞赛 +E/全国科技工作者日|,|灵璧县科协系列活动庆祝全国科技工作者日” +E/全国科技工作者日|,|宿州市埇桥区科协系列活动庆祝全国科技工作者日” +E/竞赛|,|马鞍山市花山区举办2016年七巧科技竞赛 +E/全国科技工作者日|,|和县举办第三个全国科技工作者日系列活动 +E/会员日活动|,|濉溪县科协开展中国科协会员日活动 +E/全国科技工作者日|,|阜南县科协全国科技工作者日”寄语征集活动圆满结束 +E/全国科技工作者日|,|阜阳市临泉县科协开展新时代首个全国科技工作者日”系列活动 +E/竞赛|,|界首市科协召开全市公民科学素质网络知识竞赛”动员部署会 +E/会员日活动|,|颍上县科协开展中国科协会员日”活动 +E/竞赛|,|颍州区召开全国农民科学素质和省全民科学素质网络知识竞赛推进会 +E/竞赛|,|明光市科协组织开展农民科学素质网络知识竞赛 +E/竞赛|,|全椒县举办2019年全民科学素质知识竞赛 +E/竞赛|,|全国青少年航模竞赛安徽赛区在凤阳县闭幕 +E/征求意见和建议|,|定远县科协召开科协系统深化改革实施方案征求意见座谈会 +E/征求意见和建议|,|定远县科协学习贯彻关于加强和改进党的群团工作的意见 +F/科技创新|,|肥西县召开第四届青少年科技创新县长奖颁奖大会 +F/科技节|,|肥西县召开第三届科技节暨肥西县第七届青少年航空航天航海车辆建筑模型锦标赛 +F/科技创新|,|肥西县召开第三届青少年科技创新县长奖颁奖大会 +F/科技创新|,|肥西县开展第八届青少年科技创新大赛科幻画评比 +F/科技节|,|肥西县第二届青少年科技节暨第六届青少年航空航天航海车辆建筑模型锦标赛举办 +F/科技创新|,|肥西县第二届青少年科技创新县长奖颁奖会召开 +F/工作站|,|合肥市农学会在肥西金牛蚕桑合作社设立专家工作站 +F/科技节|,|肥西县首届青少年科技节开幕 +F/科技创新|,|肥西县举办首届青少年科技创新县长奖颁奖会 +F/科技创新|,|肥东县召开第二届青少年科技创新县长奖颁奖大会 +F/科技节|,|肥东桥头集学校举办首届环保科技节活动 +F/科技创新|,|肥东县举办首届青少年科技创新大赛 +F/调研指导|,|肥东县科协检查指导白龙镇科协工作 +F/调研指导|,|中国老科协调研组调研指导肥东县老科协工作 +F/学会创新驱动服务站|,|巢湖市青少年创客科技教育协会成立 +F/论坛|,|省科协召开皖台科技论坛项目对接交流会 +F/科技创新|,|巢湖市科协举办灯塔社区青少年科幻画比赛 +F/科技创新|,|庐江县第十二届青少年科技创新大赛科幻画类作品评选结束 +F/科技创新|,|庐江县第五届青少年科技创新县长奖暨第十一届青少年科技创新大赛颁奖大会举行 +F/科技创新|,|庐江县第四届青少年科技创新县长奖暨第十届青少年科技创新大赛颁奖会召开 +F/科技创新|,|庐江县科协举办2016年度科普知识科技创新进校园”巡展活动 +F/论坛|,|2016中国·安徽健康产业高峰论坛举办 +F/科技创新|,|庐江县开展科普知识科技创新进校园”巡展活动 +F/科技创新|,|长丰县科技馆成为一中学生科技创新活动基地 +F/培训知识与技能普及培训|,|长丰县科技馆开展第四期创客培训班 +F/推广|,|长丰县瓜蒌种植协会开展技术推广培训 +F/科技创新|,|感受科技创新的力量 长丰县科技馆机器人亮相芜湖科博会 +F/论坛|,|长丰县造甲乡龙虾协举办龙虾产业与乡村旅游发展论坛” +F/调研指导|,|长丰县领导深入县科协调研指导工作 +F/科技创新|,|合肥市瑶海区颁发第三届青少年科技创新区长奖 +F/科技创新|,|合肥市庐阳区首届青少年科技创新区长奖颁奖会召开 +F/科技创新|,|合肥市蜀山区第九届青少年科技创新区长奖颁奖会召开 +F/科技创新|,|合肥市蜀山区举行第八届青少年科技创新区长奖颁奖仪式 +F/创新驱动助力工程|,|省科协组织专家赴芜湖县开展中国科协创新驱动助力工程示范项目实施工作 +F/培训知识与技能普及培训|,|芜湖市科协举办2017年全市农技协转型升级培训班 +F/对接项目|,|中国可持续发展研究会暨中国生物多样性保护与绿色发展基金会到芜湖开展创新驱动助力对接活动 +F/创新驱动助力工程知识与技能普及培训|,|安徽省科协创新驱动助力工程推进会 暨学会业务培训会在芜湖召开 +F/服务站|,|中国可持续发展研究会南陵服务站”揭牌仪式举行 +F/科技创新|,|无为县举行第八届青少年科技创新大赛开幕式暨第三届青少年科技创新县长奖颁奖典礼 +F/论坛|,|芜湖市镜湖区成功举办第二届合芜蚌”青少年机器人邀请赛暨首届人工智能+教育”高峰论坛 +F/对接会|,|凤台-石台科协系统精准扶贫对接会召开 +F/科技创新|,|蚌埠市龙子湖区首届区青少年科技创新奖揭晓 +F/科技创新|,|宿松县召开青少年科技创新工作推进会 +F/科技创新|,|宿松县召开科技创新智库建设工作推进会 +F/调研指导|,|太湖县科协深入乡镇督查指导全民科学素质纲要考核工作 +F/科技创新|,|岳西县开展2018年青少年科技创新大赛作品评选 +F/科技节|,|迎江区四照园小学校园科技节赋予六一”新内涵 +F/调研指导|,|歙县科协深入雄村学校指导开展2019年科学调查体验活动 +F/科技创新|,|祁门县举办邦耀电子杯”中小学生科技创新大赛 +F/调研指导|,|祁门县科协组织农技人员走进果园指导果农管理果树 +F/调研指导|,|祁门县塔坊镇科协积极组织农技人员深入田间地头指导农户进行油菜安全越冬防冻管理 +F/调研指导|,|祁门县组织科技工作队下乡进村指导农民抗灾自救 +F/科技创新|,|祁门县2016年中小学生科技创新大赛举办 +F/科技创新|,|黄山市屯溪区2019年青少年科技创新大赛落下帷幕 +F/科技节|,|黄山市黄山区举行2015年全国科普日主场活动暨第二届青少年科技节活动启动仪式 +G/科学普及,G/知识宣传|,|肥西县紫蓬镇举办慢性病健康知识进社区活动 基层组织 +G/科学普及,G/知识宣传|,|肥西县紫蓬镇举行2019年全国科普日”启动仪式暨宣传活动 +G/知识与技能普及培训|,|肥西县紫蓬镇举办特色种养业技术培训班 +G/科学普及|,|肥西县三河镇举行2019年全国科普日启动仪式 +G/科学普及|,|肥西县科普讲师团专家走进三河镇 +G/科学普及,G/知识宣传|,|合肥市肥西县紫蓬镇开展健康科普知识竞赛活动 +G/科学普及|,|肥西县上派镇肥光社区开展暑期科普进社区”活动 +G/知识与技能普及培训|,|肥西县花木协会举办乡村企业家实用人才培训班 +G/科学普及|,|肥西县派河社区开展暑期科普兴趣班” +G/全民科学素质工作领导小组会议|,|合肥市召开2019年全民科学素质工作领导小组会议 +G/科学普及,G/知识宣传|,|肥西县柿树岗乡科协开展禁毒科普宣讲活动 +G/专家乡村学堂讲科普|,|百名专家乡村学堂讲科普”活动走进三河镇中心学校 +G/科学普及,G/知识宣传|,|肥西县上派镇科协开展世界地球日”科普主题活动 +G/科学普及|,|肥西县科协开展2019年科普赶集”活动 +G/科普基地建设|,|肥西县地震科普馆通过验收 +G/科学普及|,|肥西县上派镇科协开展农村少儿爱科学”科普活动 +G/科学普及|,|肥西县南郢社区开展科普系列活动 +G/科普基地建设|,|肥西县科协开展科普画廊验收工作 +G/科学普及,G/知识宣传|,|肥西县上派镇紫蓬社区开展主题科普活动 +G/科学普及,G/知识讲座|,|肥西县科协举办安徽省科协科学传播专家团全省巡讲活动暨肥西县科协委员读书班 +G/科技辅导员培训|,|肥西县科协举办青少年科技创新大赛辅导员培训班 +G/科技辅导员培训|,|庐江县食用菌协会培训农村食用菌特色循环种植技术扶贫实用人才 +G/科学普及|,|肥西县开展2018年全国科普日启动仪式暨主场活动 +G/知识与技能普及培训|,|肥西县举办苗木花卉产业实用人才技能提升培训班 +G/科学普及,G/知识讲座|,|肥东县长临河镇苗木协会开展科普大讲堂活动 +G/知识与技能普及培训|,|肥西县新型职业农民水稻产业培训班开班 +G/知识与技能普及培训|,|关于转发中国科协科普部关于2017年科普人员培训班报名的通知的通知 +G/科技活动周|,|肥西县科协组织留守儿童赴中科大参加科技周活动 +G/知识与技能普及培训|,|肥西县举办企业创新方法培训活动 +G/科技馆|,|关于做好2017年度中国流动科技馆巡展工作的通知 +G/科普基地建设|,|肥西县紫蓬山管委会科协推动科普画廊建设 +G/知识与技能普及培训|,|肥东县长临河镇苗木协会举办2016年秋季苗木专业技术培训班 +G/科学普及|,|肥西县养猪协会开展2016年全国科普日活动 +G/知识讲座|,|肥西县举办领导干部大讲堂 +G/科技辅导员培训|,|肥西县珍稀食用菌协会开展农村实用人才培训 +G/知识讲座|,|肥西县苗木信息与造型技术大讲堂开班 +G/知识宣传,G/科学普及|,|肥西县科协开展秸秆禁烧”科普宣传 +G/科普基地建设|,|肥西县科协规范社区科普大学教学点建设 +G/专家乡村学堂讲科普|,|百名专家乡村学堂讲科普”走进肥西长镇中学 +G/科普基地建设|,|肥西县科协推进社区科普大学建设 +G/科普志愿活动|,|肥西县科协学雷锋志愿服务活动”启动 +G/科学普及|,|肥东县白龙镇举办2019年全国科普日科普进校园”活动 +G/科学普及|,|肥东县长临河镇开展2019年全国科普日活动 +G/知识讲座|,|肥东县长临社区科普大学举办中老年营养与膳食讲座 +G/知识宣传|,|肥东县元疃镇开展禁毒科普宣传活动 +G/知识讲座|,|肥东县白龙镇科协开展畜禽养殖业病害防治知识讲座 +G/科学普及|,|肥东县科协召开科普工作推进会 +G/科学普及|,|肥东县光大社区开展趣味小实验”儿童科普活动 +G/科学普及,G/知识讲座|,|肥东县科普大讲堂走进白龙镇双庙社区 +H/精准扶贫,H/座谈会|,|肥西县科协召开扶贫对口联系工作座谈会 +H/走访调研|,|杭州市科协来合肥市调研科普工作 +H/精准扶贫|,|精准扶贫健康科普进乡村活动仪式在肥西丰乐镇举办 +H/精准扶贫|,|合肥市科协到临泉阜南对接科技助力精准扶贫工程 +H/脱贫|,|霍邱县和肥东县科协为贫困学生开展科学文化之旅及农村少儿爱科学活动 +H/脱贫,H/组织专家义诊|,|省医学保健养生研究会党委组织开展健康扶贫义诊活动 +H/帮扶|,|肥东县白龙镇科协为果蔬种植户送防寒保收秘诀” +H/走访调研|,|肥东县督查调研乡镇科协工作 +H/走访调研|,|肥东县科协部署农技协专项资金使用管理自查自纠和乡镇科协督查调研工作 +H/精准扶贫|,|肥东科协助推企业科技精准扶贫 +H/慰问|,|肥东县老科协慰问有突出贡献的优秀老科技工作者 +H/调查|,|肥东县开展青少年科学调查体验活动 +H/座谈会|,|巢湖市科协组织召开礼赞共和国 智慧新生活”科技工作者座谈会 +H/走访调研|,|魏军锋到巢湖市科协调研 +H/座谈会|,|合肥市科协召开学会工作座谈会 +H/慰问|,|庐江县开展走访慰问科技工作者代表活动 +H/组织专家义诊|,|全国第二十四届肿瘤防治抗癌周大型义诊活动庐江县站 +H/精准扶贫|,|庐江县食用菌协会召开精准扶贫工作推进会 +H/脱贫|,|庐江县科协开展脱贫攻坚工作 +H/慰问|,|庐江县食用菌协会开展春节慰问困难会员活动 +H/精准扶贫|,|庐江县食用菌协会精准扶贫工作受合肥市政协领导高度肯定 +H/灾后农业生产自救|,|庐江县食用菌协会积极开展灾后科普服务 +H/知识宣传,H/灾后农业生产自救|,|庐江县郭河镇科协积极开展灾后科普宣传 +H/慰问|,|长丰县领导慰问科技工作者代表 +H/慰问|,|蔡士祥到长丰县岗集镇看望省科协挂职干部慰问困难党员 +H/座谈会|,|长丰县召开青少年科技创新工作座谈会 +H/脱贫|,|长丰县科协送科技到包联贫困户 +H/走访调研|,|长丰县政协开展科普工作专题调研活动 +H/帮扶|,|王海彦赴长丰县岗集镇调研江淮分水岭对口帮扶工作 +H/走访调研|,|王洵赴长丰县岗集镇调研江淮分水岭综合开发治理工作 +H/帮扶|,|六安市裕安区科协赴合肥市包河区科技局开展扶贫结对帮扶活动 +H/调查|,|2018年芜湖市青少年科学调查体验活动启动 +H/调查|,|芜湖县赵桥小学举行2018年青少年科学调查体验活动启动仪式 +H/走访调研|,|中国科协农技中心农技协发展处来皖调研芜湖农技协工作 +H/精准扶贫|,|芜湖市科协发挥农技协优势科技助力精准扶贫 +H/调查|,|芜湖市2016年全国青少年科学调查体验活动启动仪式举行 +H/组织专家义诊|,|无为县护理学会开展纪念512国际护士节大型义诊活动 +H/帮扶|,|安庆市望江县科协赴芜湖市镜湖区对接帮扶工作 +H/精准扶贫|,|科技助力 精准扶贫” 合肥蜀山科协精准科普进寿县 +H/慰问|,|五河县领导看望慰问优秀科技工作者代表 +H/调查|,|桐城市东关小学2019青少年科学调查体验活动启动 +H/扶贫|,|桐城创新机制 提升科技扶贫实效 +H/慰问|,|桐城市领导走访慰问基层一线科技工作者 +H/走访调研|,|中国科协科普部基层处调研安庆市科普信息化落地应用工作 +H/走访调研|,|桐城市科协组队赴浙江余姚考察学习 +H/脱贫|,|桐城市科协召开脱贫攻坚推进会 +H/脱贫|,|桐城市科协党员活动日”开展脱贫攻坚入户走访 diff --git a/data/valid.csv b/data/valid.csv new file mode 100644 index 0000000..eefe754 --- /dev/null +++ b/data/valid.csv @@ -0,0 +1,63 @@ +label|,|ques +A/十九大,A/贯彻学习|,|桐城市科协召开学习贯彻党的十九大精神专题会 +A/十九大|,|桐城市科协组织收看党的十九大开幕式 +A/组织党员干部观看红色电影|,|桐城市科协组织机关全体党员干部观看榜样 +A/传达学习|,|桐城市科协传达学习省科协九届六次全委会议精神 +A/主题活动|,|宿松县开展低碳生活绿色出行”主题宣传活动 +A/代表大会|,|宿松县科协第三次代表大会召开 +A/代表大会|,|肥西县桃花镇召开科协第三次代表大会 +A/年度民主生活会|,|怀宁县科协党组召开讲重作”专题警示教育民主生活会 +A/组织党员干部参观|,|怀宁县科协组织党员科技工作者服务团开展听民声送技术活动 +B/会员大会|,|庐江县举办茶叶协会二届二次会员大会暨庐江县茶叶公共品牌培训会 +B/全委扩大会议|,|庐江县科协四届六次全委会议召开 +B/全委扩大会议|,|长丰县科协召开三届二次全委会议 +B/换届|,|宿松县召开科协系统深化改革和县乡科协换届工作部署会 +B/全委扩大会议|,|濉溪县科协召开四届九次全委扩大会议 +B/群团工作会议|,|全椒县科协认真贯彻落实全市群团工作者培训班会议精神 +B/全委扩大会议|,|来安县科协召开四届二次全委扩大会议 +B/群团工作会议|,|定远县科协学习贯彻省委群团工作会议精神 +C/文明实践科技与科普服务平台|,|安徽省科协 安徽省文明办关于开展新时代文明实践中心科技志愿服务工作的通知 +C/新时代文明实践科技与科普服务平台,C/志愿|,|宿松县新时代文明实践中心科技志愿服务队开展水源垃圾清理活动 +C/展览献爱心|,|太湖县中学生参加 2017年参观科技展览有奖征文暨科技夏令营”获佳绩 +D/示范|,|天长市科普示范基地首家农业院士工作站挂牌成立 +D/示范|,|明光市开展防震减灾科普示范学校评选活动 +D/表彰,D/优秀人才评选,D/先进|,|全椒县评选表彰基层科普行动计划”全民科学素质工作”先进集体和先进个人 +D/先进|,|定远县科协获评全县六五”普法依法治理工作先进单位 +D/表彰|,|马鞍山市花山区优秀科技工作者表彰会召开 +D/表彰|,|和县科协社会组织党建工作获表彰 +D/示范|,|和县科协组织召开全县水稻麦茬免耕直播千亩示范现场会 +D/示范|,|当涂县召开创建省科普示范县工作调度会 +D/示范|,|濉溪县科协举办优秀学术论文颁奖农村科普示范基地授牌仪式 +D/表彰|,|颍上县召开2015年全国农民科学素质网络知识竞赛总结表彰会 +D/表彰|,|天长市举办第五届青少年科技创新市长奖”表彰大会 +D/表彰|,|天长市委市政府表彰第二届千秋英才奖” +E/竞赛|,|肥西县首届青少年信息学竞赛圆满结束 +E/竞赛|,|肥东县举办第二届青少年机器人竞赛 +F/调研指导|,|祁门县组织科技工作队下乡进村指导农民抗灾自救 +F/科技创新|,|祁门县2016年中小学生科技创新大赛举办 +F/科技创新|,|黄山市屯溪区2019年青少年科技创新大赛落下帷幕 +F/科技创新|,|庐江县第五届青少年科技创新县长奖暨第十一届青少年科技创新大赛颁奖大会举行 +F/科技创新|,|庐江县第四届青少年科技创新县长奖暨第十届青少年科技创新大赛颁奖会召开 +F/科技创新|,|庐江县科协举办2016年度科普知识科技创新进校园”巡展活动 +F/科技节|,|黄山市黄山区举行2015年全国科普日主场活动暨第二届青少年科技节活动启动仪式 +G/科学普及,G/知识宣传|,|肥西县紫蓬镇举办慢性病健康知识进社区活动 基层组织 +G/科学普及,G/知识宣传|,|肥西县紫蓬镇举行2019年全国科普日”启动仪式暨宣传活动 +G/知识与技能普及培训|,|肥西县紫蓬镇举办特色种养业技术培训班 +G/科学普及|,|肥西县三河镇举行2019年全国科普日启动仪式 +G/科学普及|,|肥西县科普讲师团专家走进三河镇 +G/科学普及,G/知识宣传|,|合肥市肥西县紫蓬镇开展健康科普知识竞赛活动 +G/科学普及|,|肥西县科协开展2019年科普赶集”活动 +G/科普基地建设|,|肥西县地震科普馆通过验收 +G/科学普及|,|肥西县上派镇科协开展农村少儿爱科学”科普活动 +G/科学普及|,|肥西县南郢社区开展科普系列活动 +H/组织专家义诊|,|无为县护理学会开展纪念512国际护士节大型义诊活动 +H/帮扶|,|安庆市望江县科协赴芜湖市镜湖区对接帮扶工作 +H/精准扶贫|,|科技助力 精准扶贫” 合肥蜀山科协精准科普进寿县 +H/慰问|,|五河县领导看望慰问优秀科技工作者代表 +H/调查|,|桐城市东关小学2019青少年科学调查体验活动启动 +H/扶贫|,|桐城创新机制 提升科技扶贫实效 +H/慰问|,|桐城市领导走访慰问基层一线科技工作者 +H/走访调研|,|中国科协科普部基层处调研安庆市科普信息化落地应用工作 +H/走访调研|,|桐城市科协组队赴浙江余姚考察学习 +H/脱贫|,|桐城市科协召开脱贫攻坚推进会 +H/脱贫|,|桐城市科协党员活动日”开展脱贫攻坚入户走访 \ No newline at end of file diff --git a/data_preprocess/__init__.py b/data_preprocess/__init__.py new file mode 100644 index 0000000..a1f6a11 --- /dev/null +++ b/data_preprocess/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/3 10:50 +# @author :Mo +# @function : \ No newline at end of file diff --git a/data_preprocess/data_excel2csv.py b/data_preprocess/data_excel2csv.py new file mode 100644 index 0000000..570f27b --- /dev/null +++ b/data_preprocess/data_excel2csv.py @@ -0,0 +1,65 @@ +# !/usr/bin/python +# -*- coding: utf-8 -*- +# @time : 2021/05/25 20:35 +# @author : zh-atom +# @function: + +from keras_textclassification.data_preprocess.text_preprocess import load_json, save_json, txt_read +from keras_textclassification.conf.path_config import path_model_dir +from keras_textclassification.conf.path_config import path_train, path_valid, path_label, path_root +from tqdm import tqdm +import pandas as pd +import numpy as np +import json +import os +import re + +def removePunctuation(content): + """ + 文本去标点 + """ + punctuation = r"~!@#$%^&*()_+`{}|\[\]\:\";\-\\\='<>?,.,。、《》?;:‘“{【】}|、!@#¥%……&*()——+=-" + content = re.sub(r'[{}]+'.format(punctuation), '', content) + + if content.startswith(' ') or content.endswith(' '): + re.sub(r"^(\s+)|(\s+)$", "", content) + return content.strip() + +def excel2csv(): + labels = [] + trains = ['label|,|ques'] + data = pd.read_excel(os.path.dirname(path_train)+'/02-anhui.xlsx') + data = np.array(data) + data = data.tolist() + for s_list in data: + print(s_list) + label_tmp = removePunctuation(s_list[5]) + if ' ' in label_tmp: + train_tmp = [] + label_tmp = label_tmp.split(' ') + for i in label_tmp: + label = removePunctuation(s_list[4]) + '/' + removePunctuation(i) + labels.append(label) + train_tmp.append(label) + train = ','.join(train_tmp) + '|,|' + removePunctuation(s_list[3]) + trains.append(train) + else: + label = removePunctuation(s_list[4]) + '/' + removePunctuation(s_list[5]) + labels.append(label) + trains.append(label + '|,|' + removePunctuation(s_list[3])) + + # 生成 label 文件 + with open(path_label, 'w', encoding='utf-8') as f_label: + labels = list(set(labels)) + labels.sort(reverse=False) + for line in labels: + f_label.write(line + '\n') + f_label.close() + + # 生成 train.csv 文件 + with open(path_train, 'w', encoding='utf-8') as f_train: + for line in trains: + f_train.write(line + '\n') + f_train.close() + + return None diff --git a/data_preprocess/data_split.py b/data_preprocess/data_split.py new file mode 100644 index 0000000..e7eb454 --- /dev/null +++ b/data_preprocess/data_split.py @@ -0,0 +1,132 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/10/13 8:07 +# @author :Mo +# @function :数据切分为训练集,验证集 + + +# 适配linux +import pathlib +import sys +import os +project_path = str(pathlib.Path(os.path.abspath(__file__)).parent.parent.parent) +sys.path.append(project_path) +from sklearn.model_selection import StratifiedKFold +import pandas as pd +import numpy as np +import random + +from keras_textclassification.data_preprocess.text_preprocess import txt_write, txt_read + + +def data_kfold(path_org_data, k_fold_split=10, path_save_dir=""): + """ + 切分训练-测试集, 使用sklearn的StratifiedKFold + :param path_org_data: str, 原始语料绝对路径地址,utf-8的csv格式 + :param k_fold_split: int, k折切分, 原始语料中每个类至少有k_fold_split条句子 + :param path_save_dir: str, 生成训练集-测试集文件的保存目录 + :return: + """ + label_ques = pd.read_csv(path_org_data, names=["label","ques"], usecols=["label","ques"]) + quess = label_ques["ques"].values.tolist()[1:] + labels = label_ques["label"].values.tolist()[1:] + + quess, labels = np.array(quess), np.array(labels) + kf_sp = StratifiedKFold(n_splits=k_fold_split) + + for train_index, dev_index in kf_sp.split(quess, labels): + train_x, train_y = quess[train_index], labels[train_index] + dev_x, dev_y = quess[dev_index], labels[dev_index] + print(len(set(train_y))) + print(len(set(dev_y))) + + lq_train = [train_y[i].replace(",",",").strip() + "," + train_x[i].replace(",",",").strip() + "\n" + for i in range(len(train_y))] + lq_valid = [dev_y[i].replace(",",",").strip() + "," + dev_x[i].replace(",",",").strip() + "\n" + for i in range(len(dev_y))] + txt_write(["label,ques\n"] + lq_train, path_save_dir + "lq_train.csv") + txt_write(["label,ques\n"] + lq_valid, path_save_dir + "lq_valid.csv") + break + + + +def data_split_train_val_label(path_org_data, path_save_dir, count_num=500000, use_shuffle=True): + """ + 解决numpy.array()报内存错误的情况, + 根据label划分训练-验证集, 保证每个类都选择 + :param path_org_data: str, 需要划分数据地址 + :param count_num:int, 预估一下数据量, 最好小于用例总量 + :return: + """ + def shuffle_corpus(corpus): + # 先在label内shuffle + random.shuffle(corpus) + corpus_num = len(corpus) + valid_portion = 0.2 + # +1,-1是为了保证该label下sample为1也可以取到 + train = corpus[0 : int((1 - valid_portion) * corpus_num) + 1] + test = corpus[int((1 - valid_portion) * corpus_num) + 1-1 : ] + return train, test + # open().readline()单条数据读取 + datas = open(path_org_data, 'r', encoding='utf-8') + data_all = {} + label_set = set() + count = 0 + while True: + count += 1 + if count % 3200 ==0: + print(count) + line = datas.readline() + # 跳出循环条件 + if not line and count > count_num: + break + if line: + if line.strip() and count > 1: + line_sp = line.strip().split(",") + if len(line_sp) >= 2: + label = line_sp[0] + quest = line_sp[1] + if label == "": + label = "NAN" + label_set.add(label) + if label in data_all: + data_all[label].append(quest) + else: + data_all[label] = [quest] + + # 循环写入文件 + txt_write(['label,ques'+'\n'], path_save_dir + "train.csv") + txt_write(['label,ques'+'\n'], path_save_dir + "valid.csv") + for label_set_one in list(label_set): + train, val = shuffle_corpus(data_all[label_set_one]) + train_ = [label_set_one + "," + t + "\n" for t in train] + val_ = [label_set_one + "," + v + "\n" for v in val] + + txt_write(train_, path_save_dir + "train.csv", type='a+') + txt_write(val_, path_save_dir + "valid.csv", type='a+') + + + # 是否扰乱 + if use_shuffle: + trains = txt_read("train.csv") + valids = txt_read("valid.csv") + random.shuffle(trains) + random.shuffle(valids) + trains = [t + "\n" for t in trains] + valids = [v + "\n" for v in valids] + txt_write(['label,ques'+'\n'], path_save_dir + "train.csv") + txt_write(['label,ques'+'\n'], path_save_dir + "valid.csv") + txt_write(trains, path_save_dir + "train.csv", type='a+') + txt_write(valids, path_save_dir + "valid.csv", type='a+') + + + +if __name__ == '__main__': + + from keras_textclassification.conf.path_config import path_root + filepath = path_root + "/data/baidu_qa_2019/baike_qa_train.csv" # 原始语料 + k_fold_split = 10 + data_kfold(path_org_data=filepath, k_fold_split=10, path_save_dir=path_root+ "/data/baidu_qa_2019/") + # data_split_train_val_label(path_org_data=filepath, + # path_save_dir=path_root+ "/data/baidu_qa_2019/", + # count_num = 500000, use_shuffle = True) diff --git a/data_preprocess/generator_preprocess.py b/data_preprocess/generator_preprocess.py new file mode 100644 index 0000000..9e3a8b3 --- /dev/null +++ b/data_preprocess/generator_preprocess.py @@ -0,0 +1,364 @@ +# !/usr/bin/python +# -*- coding: utf-8 -*- +# @time : 2019/11/2 21:08 +# @author : Mo +# @function: + + +from keras_textclassification.data_preprocess.text_preprocess import load_json, save_json, txt_read +from keras_textclassification.conf.path_config import path_model_dir +from keras_textclassification.conf.path_config import path_baidu_qa_2019_train, path_baidu_qa_2019_valid, path_root +from tqdm import tqdm +import pandas as pd +import numpy as np +import json +import os + + +class PreprocessGenerator: + """ + 数据预处理, 输入为csv格式, [label,ques] + """ + def __init__(self, path_model_dir): + self.l2i_i2l = None + self.path_fast_text_model_vocab2index = path_model_dir + 'vocab2index.json' + self.path_fast_text_model_l2i_i2l = path_model_dir + 'l2i_i2l.json' + if os.path.exists(self.path_fast_text_model_l2i_i2l): + self.l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) + + def prereocess_idx(self, pred): + if os.path.exists(self.path_fast_text_model_l2i_i2l): + pred_i2l = {} + i2l = self.l2i_i2l['i2l'] + for i in range(len(pred)): + pred_i2l[i2l[str(i)]] = pred[i] + pred_i2l_rank = [sorted(pred_i2l.items(), key=lambda k: k[1], reverse=True)] + return pred_i2l_rank + else: + raise RuntimeError("path_fast_text_model_label2index is None") + + def prereocess_pred_xid(self, pred): + if os.path.exists(self.path_fast_text_model_l2i_i2l): + pred_l2i = {} + l2i = self.l2i_i2l['l2i'] + for i in range(len(pred)): + pred_l2i[pred[i]] = l2i[pred[i]] + pred_l2i_rank = [sorted(pred_l2i.items(), key=lambda k: k[1], reverse=True)] + return pred_l2i_rank + else: + raise RuntimeError("path_fast_text_model_label2index is None") + + def preprocess_get_label_set(self, path): + # 首先获取label,set,即存在的具体类 + label_set = set() + len_all = 0 + file_csv = open(path, "r", encoding="utf-8") + for line in file_csv: + len_all += 1 + if len_all > 1: # 第一条是标签'label,ques',不选择 + line_sp = line.split(",") + label_org = str(line_sp[0]).strip().upper() + label_real = "NAN" if label_org=="" else label_org + label_set.add(label_real) + file_csv.close() + return label_set, len_all + + def preprocess_label_ques_to_idx(self, embedding_type, batch_size, path, embed, rate=1, epcoh=20): + label_set, len_all = self.preprocess_get_label_set(path) + # 获取label转index字典等, 如果label2index存在则不转换了, dev验证集合的时候用 + if not os.path.exists(self.path_fast_text_model_l2i_i2l): + count = 0 + label2index = {} + index2label = {} + for label_one in label_set: + label2index[label_one] = count + index2label[count] = label_one + count = count + 1 + + l2i_i2l = {} + l2i_i2l['l2i'] = label2index + l2i_i2l['i2l'] = index2label + save_json(l2i_i2l, self.path_fast_text_model_l2i_i2l) + else: + l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) + + # 读取数据的比例 + len_ql = int(rate * len_all) + if len_ql <= 500: # sample时候不生效,使得语料足够训练 + len_ql = len_all + + def process_line(line): + # 对每一条数据操作,获取label和问句index + line_sp = line.split(",") + ques = str(line_sp[1]).strip().upper() + label = str(line_sp[0]).strip().upper() + label = "NAN" if label == "" else label + que_embed = embed.sentence2idx(ques) + label_zeros = [0] * len(l2i_i2l['l2i']) + label_zeros[l2i_i2l['l2i'][label]] = 1 + return que_embed, label_zeros + for _ in range(epcoh): + while True: + file_csv = open(path, "r", encoding="utf-8") + cout_all_line = 0 + cnt = 0 + x, y = [], [] + # 跳出循环 + if len_ql < cout_all_line: + break + for line in file_csv: + cout_all_line += 1 + if cout_all_line > 1: # 第一条是标签'label,ques',不选择 + x_line, y_line = process_line(line) + x.append(x_line) + y.append(y_line) + cnt += 1 + if cnt == batch_size: + if embedding_type in ['bert', 'albert']: + x_, y_ = np.array(x), np.array(y) + x_1 = np.array([x[0] for x in x_]) + x_2 = np.array([x[1] for x in x_]) + x_all = [x_1, x_2] + elif embedding_type == 'xlnet': + x_, y_ = x, np.array(y) + x_1 = np.array([x[0][0] for x in x_]) + x_2 = np.array([x[1][0] for x in x_]) + x_3 = np.array([x[2][0] for x in x_]) + x_all = [x_1, x_2, x_3] + else: + x_all, y_ = np.array(x), np.array(y) + + cnt = 0 + yield (x_all, y_) + x, y =[], [] + file_csv.close() + print("preprocess_label_ques_to_idx ok") + + + +class PreprocessSimGenerator: + """ + 数据预处理, 输入为csv格式, [label,ques] + """ + def __init__(self, path_model_dir): + self.l2i_i2l = None + self.path_fast_text_model_vocab2index = path_model_dir + 'vocab2index.json' + self.path_fast_text_model_l2i_i2l = path_model_dir + 'l2i_i2l.json' + if os.path.exists(self.path_fast_text_model_l2i_i2l): + self.l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) + + def prereocess_idx(self, pred): + if os.path.exists(self.path_fast_text_model_l2i_i2l): + pred_i2l = {} + i2l = self.l2i_i2l['i2l'] + for i in range(len(pred)): + pred_i2l[i2l[str(i)]] = pred[i] + pred_i2l_rank = [sorted(pred_i2l.items(), key=lambda k: k[1], reverse=True)] + return pred_i2l_rank + else: + raise RuntimeError("path_fast_text_model_label2index is None") + + def prereocess_pred_xid(self, pred): + if os.path.exists(self.path_fast_text_model_l2i_i2l): + pred_l2i = {} + l2i = self.l2i_i2l['l2i'] + for i in range(len(pred)): + pred_l2i[pred[i]] = l2i[pred[i]] + pred_l2i_rank = [sorted(pred_l2i.items(), key=lambda k: k[1], reverse=True)] + return pred_l2i_rank + else: + raise RuntimeError("path_fast_text_model_label2index is None") + + def preprocess_get_label_set(self, path): + # 首先获取label,set,即存在的具体类 + label_set = set() + len_all = 0 + file_csv = open(path, "r", encoding="utf-8") + for line in file_csv: + len_all += 1 + data = json.loads(line) + label_real = data['label'] + label_set.add(label_real) + file_csv.close() + return label_set, len_all + + def preprocess_label_ques_to_idx_old(self, embedding_type, batch_size, path, embed, rate=1, epcoh=20): + label_set, len_all = self.preprocess_get_label_set(path) + # 获取label转index字典等, 如果label2index存在则不转换了, dev验证集合的时候用 + if not os.path.exists(self.path_fast_text_model_l2i_i2l): + count = 0 + label2index = {} + index2label = {} + for label_one in label_set: + label2index[label_one] = count + index2label[count] = label_one + count = count + 1 + + l2i_i2l = {} + l2i_i2l['l2i'] = label2index + l2i_i2l['i2l'] = index2label + save_json(l2i_i2l, self.path_fast_text_model_l2i_i2l) + else: + l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) + + # 读取数据的比例 + len_ql = int(rate * len_all) + if len_ql <= 500: # sample时候不生效,使得语料足够训练 + len_ql = len_all + + def process_line(line): + # 对每一条数据操作,获取label和问句index + data = json.loads(line) + label = data['label'] + ques_1 = data['sentence1'] + ques_2 = data['sentence2'] + offset = data['offset'] + mention = data["mention"] + offset_i = int(offset) + # if data.get("label_l2i"): + # ques_entity = data.get("label_l2i") + "#" + ques_1[:offset_i] + "#" + mention + "#" + ques_1[offset_i+len(mention):] + # else: + # ques_entity = ques_1[:offset_i] + "#" + mention + "#" + ques_1[offset_i+len(mention):] + "$$" + ques_2 + # que_embed = embed.sentence2idx(text=ques_entity) + que_embed = embed.sentence2idx(ques_1, second_text=ques_2) + label_zeros = [0] * len(l2i_i2l['l2i']) + label_zeros[l2i_i2l['l2i'][label]] = 1 + return que_embed, label_zeros + + for _ in range(epcoh): + while True: + file_csv = open(path, "r", encoding="utf-8") + cout_all_line = 0 + cnt = 0 + x, y = [], [] + # 跳出循环 + if len_ql < cout_all_line: + break + for line in file_csv: + cout_all_line += 1 + x_line, y_line = process_line(line) + x.append(x_line) + y.append(y_line) + cnt += 1 + if cnt == batch_size: + if embedding_type in ['bert', 'albert']: + x_, y_ = np.array(x), np.array(y) + x_1 = np.array([x[0] for x in x_]) + x_2 = np.array([x[1] for x in x_]) + x_all = [x_1, x_2] + elif embedding_type == 'xlnet': + x_, y_ = x, np.array(y) + x_1 = np.array([x[0][0] for x in x_]) + x_2 = np.array([x[1][0] for x in x_]) + x_3 = np.array([x[2][0] for x in x_]) + x_all = [x_1, x_2, x_3] + else: + x_all, y_ = np.array(x), np.array(y) + + cnt = 0 + yield (x_all, y_) + x, y =[], [] + file_csv.close() + print("preprocess_label_ques_to_idx ok") + + def preprocess_label_ques_to_idx(self, embedding_type, batch_size, path, embed, rate=1, epcoh=20): + label_set, len_all = self.preprocess_get_label_set(path) + # 获取label转index字典等, 如果label2index存在则不转换了, dev验证集合的时候用 + if not os.path.exists(self.path_fast_text_model_l2i_i2l): + count = 0 + label2index = {} + index2label = {} + for label_one in label_set: + label2index[label_one] = count + index2label[count] = label_one + count = count + 1 + + l2i_i2l = {} + l2i_i2l['l2i'] = label2index + l2i_i2l['i2l'] = index2label + save_json(l2i_i2l, self.path_fast_text_model_l2i_i2l) + else: + l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) + + # 读取数据的比例 + len_ql = int(rate * len_all) + if len_ql <= 500: # sample时候不生效,使得语料足够训练 + len_ql = len_all + + def process_line(line): + # 对每一条数据操作,获取label和问句index + data = json.loads(line) + label = data['label'] + ques_1 = data['sentence1'] + ques_2 = data['sentence2'] + offset = data['offset'] + mention_1 = data["mention"] + offset_i = int(offset) + que_embed_1 = embed.sentence2idx(text=ques_1) + que_embed_2 = embed.sentence2idx(text=ques_2) + """ques1""" + [input_id_1, input_type_id_1, input_mask_1] = que_embed_1 + input_start_mask_1 = [0] * len(input_id_1) + input_start_mask_1[offset_i] = 1 + input_end_mask_1 = [0] * len(input_id_1) + input_end_mask_1[offset_i + len(mention_1) - 1] = 1 + input_entity_mask_1 = [0] * len(input_id_1) + input_entity_mask_1[offset_i:offset_i + len(mention_1)] = [1] * len(mention_1) + """ques2""" + [input_id_2, input_type_id_2, input_mask_2] = que_embed_2 + kind_2 = [0] * len(input_type_id_2) + kind_21 = [0] * len(input_type_id_2) + que_2_sp = ques_2.split("|") + if len(que_2_sp)>=2: + que_2_sp_sp = que_2_sp[0].split(":") + if len(que_2_sp_sp)==2: + kind_2_start = len(que_2_sp_sp[0]) - 1 + kind_2_end = kind_2_start + len(que_2_sp_sp[1]) - 1 + kind_2[kind_2_start:kind_2_end] = [1] * (kind_2_end - kind_2_start) + if "标签:" in que_2_sp[1]: + que_21_sp_sp = que_2_sp[1].split(":") + kind_21_start = len(que_2_sp[0]) + len(que_21_sp_sp[0]) - 1 + kind_21_end = len(que_2_sp[0]) + len(que_21_sp_sp[0]) + len(que_21_sp_sp[1]) - 1 + kind_21[kind_21_start:kind_21_end] = [1] * (kind_21_end - kind_21_start) + que_embed_x=[input_id_1, input_type_id_1, input_mask_1, input_start_mask_1, input_end_mask_1, input_entity_mask_1, + input_id_2, input_type_id_2, input_mask_2, kind_2, kind_21] + label_zeros = [0] * len(l2i_i2l['l2i']) + label_zeros[l2i_i2l['l2i'][label]] = 1 + return que_embed_x, label_zeros + + for _ in range(epcoh): + while True: + file_csv = open(path, "r", encoding="utf-8") + cout_all_line = 0 + cnt = 0 + x, y = [], [] + # 跳出循环 + if len_ql < cout_all_line: + break + for line in file_csv: + cout_all_line += 1 + x_line, y_line = process_line(line) + x.append(x_line) + y.append(y_line) + cnt += 1 + if cnt == batch_size: + if embedding_type in ['bert', 'albert']: + x_, y_ = np.array(x), np.array(y) + x_all = [] + for i in range(len(x_[0])): + x_1 = np.array([x[i] for x in x_]) + x_all.append(x_1) + elif embedding_type == 'xlnet': + x_, y_ = x, np.array(y) + x_1 = np.array([x[0][0] for x in x_]) + x_2 = np.array([x[1][0] for x in x_]) + x_3 = np.array([x[2][0] for x in x_]) + x_all = [x_1, x_2, x_3] + else: + x_all, y_ = np.array(x), np.array(y) + + cnt = 0 + yield (x_all, y_) + x, y =[], [] + file_csv.close() + print("preprocess_label_ques_to_idx ok") diff --git a/data_preprocess/text_preprocess.py b/data_preprocess/text_preprocess.py new file mode 100644 index 0000000..230d3bc --- /dev/null +++ b/data_preprocess/text_preprocess.py @@ -0,0 +1,860 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/5 21:36 +# @author :Mo +# @function :data utils of text classification + + +# from keras_textclassification.conf.path_config import path_model_dir +# path_fast_text_model_vocab2index = path_model_dir + 'vocab2index.json' +# path_fast_text_model_l2i_i2l = path_model_dir + 'l2i_i2l.json' +from collections import Counter +from tqdm import tqdm +import pandas as pd +import numpy as np +import random +# import jieba +import json +import re +import os + + +__all__ = ["PreprocessText", "PreprocessTextMulti", "PreprocessSim"] + +__tools__ = ["txt_read", "txt_write", "extract_chinese", "read_and_process", + "preprocess_label_ques", "save_json", "load_json", "delete_file", + "transform_multilabel_to_multihot"] + + +def txt_read(file_path, encode_type='utf-8'): + """ + 读取txt文件,默认utf8格式 + :param file_path: str, 文件路径 + :param encode_type: str, 编码格式 + :return: list + """ + list_line = [] + try: + file = open(file_path, 'r', encoding=encode_type) + while True: + line = file.readline() + line = line.strip() + if not line: + break + list_line.append(line) + file.close() + except Exception as e: + print(str(e)) + finally: + return list_line + + +def txt_write(list_line, file_path, type='w', encode_type='utf-8'): + """ + txt写入list文件 + :param listLine:list, list文件,写入要带"\n" + :param filePath:str, 写入文件的路径 + :param type: str, 写入类型, w, a等 + :param encode_type: + :return: + """ + try: + file = open(file_path, type, encoding=encode_type) + file.writelines(list_line) + file.close() + + except Exception as e: + print(str(e)) + + +def extract_chinese(text): + """ + 只提取出中文、字母和数字 + :param text: str, input of sentence + :return: + """ + chinese_exttract = ''.join(re.findall(u"([\u4e00-\u9fa5A-Za-z0-9@._])", text)) + return chinese_exttract + + +def read_and_process(path): + """ + 读取文本数据并 + :param path: + :return: + """ + # with open(path, 'r', encoding='utf-8') as f: + # lines = f.readlines() + # line_x = [extract_chinese(str(line.split(",")[0])) for line in lines] + # line_y = [extract_chinese(str(line.split(",")[1])) for line in lines] + # return line_x, line_y + + data = pd.read_csv(path) + ques = data["ques"].values.tolist() + labels = data["label"].values.tolist() + line_x = [extract_chinese(str(line).upper()) for line in labels] + line_y = [extract_chinese(str(line).upper()) for line in ques] + return line_x, line_y + + +def preprocess_label_ques(path): + x, y, x_y = [], [], [] + x_y.append('label,ques\n') + with open(path, 'r', encoding='utf-8') as f: + while True: + line = f.readline() + try: + line_json = json.loads(line) + except: + break + ques = line_json['title'] + label = line_json['category'][0:2] + line_x = " ".join([extract_chinese(word) for word in list(jieba.cut(ques, cut_all=False, HMM=True))]).strip().replace(' ',' ') + line_y = extract_chinese(label) + x_y.append(line_y+','+line_x+'\n') + return x_y + + +def save_json(jsons, json_path): + """ + 保存json, + :param json_: json + :param path: str + :return: None + """ + with open(json_path, 'w', encoding='utf-8') as fj: + fj.write(json.dumps(jsons, ensure_ascii=False)) + fj.close() + + +def load_json(path): + """ + 获取json,只取第一行 + :param path: str + :return: json + """ + with open(path, 'r', encoding='utf-8') as fj: + model_json = json.loads(fj.readlines()[0]) + return model_json + + +def delete_file(path): + """ + 删除一个目录下的所有文件 + :param path: str, dir path + :return: None + """ + for i in os.listdir(path): + # 取文件或者目录的绝对路径 + path_children = os.path.join(path, i) + if os.path.isfile(path_children): + if path_children.endswith(".h5") or path_children.endswith(".json"): + os.remove(path_children) + else:# 递归, 删除目录下的所有文件 + delete_file(path_children) + + +def get_ngram(text, ns=[1]): + """ + 获取文本的ngram等特征 + :param text: str + :return: list + """ + if type(ns) != list: + raise RuntimeError("ns of function get_ngram() must be list!") + for n in ns: + if n < 1: + raise RuntimeError("enum of ns must '>1'!") + len_text = len(text) + ngrams = [] + for n in ns: + ngram_n = [] + for i in range(len_text): + if i + n <= len_text: + ngram_n.append(text[i:i+n]) + else: + break + if not ngram_n: + ngram_n.append(text) + ngrams += ngram_n + return ngrams + + +def transform_multilabel_to_multihot(sample, label=1070): + """ + + :param sample: [1, 2, 3, 4] + :param label: 1022 + :return: [1, 0, 1, 1, ......] + """ + result = np.zeros(label) + result[sample] = 1 + res = result.tolist() + # res = ''.join([str(r) for r in res]) + return res + + +class PreprocessText: + """ + 数据预处理, 输入为csv格式, [label,ques] + """ + def __init__(self, path_model_dir): + self.l2i_i2l = None + self.path_fast_text_model_vocab2index = path_model_dir + 'vocab2index.json' + self.path_fast_text_model_l2i_i2l = path_model_dir + 'l2i_i2l.json' + if os.path.exists(self.path_fast_text_model_l2i_i2l): + self.l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) + + def prereocess_idx(self, pred, digits=5): + if os.path.exists(self.path_fast_text_model_l2i_i2l): + pred_i2l = {} + i2l = self.l2i_i2l['i2l'] + for i in range(len(pred)): + pred_i2l[i2l[str(i)]] = round(float(pred[i]), digits) + pred_i2l_rank = [sorted(pred_i2l.items(), key=lambda k: k[1], reverse=True)] + return pred_i2l_rank + else: + raise RuntimeError("path_fast_text_model_label2index is None") + + def prereocess_pred_xid(self, pred): + if os.path.exists(self.path_fast_text_model_l2i_i2l): + pred_l2i = {} + l2i = self.l2i_i2l['l2i'] + for i in range(len(pred)): + pred_l2i[pred[i]] = l2i[pred[i]] + pred_l2i_rank = [sorted(pred_l2i.items(), key=lambda k: k[1], reverse=True)] + return pred_l2i_rank + else: + raise RuntimeError("path_fast_text_model_label2index is None") + + def preprocess_label_ques_to_idx(self, embedding_type, path, embed, rate=1, shuffle=True, graph=None): + data = pd.read_csv(path) + ques = data['ques'].tolist() + label = data['label'].tolist() + ques = [str(q).upper() for q in ques] + label = [str(l).upper() for l in label] + if shuffle: + ques = np.array(ques) + label = np.array(label) + indexs = [ids for ids in range(len(label))] + random.shuffle(indexs) + ques, label = ques[indexs].tolist(), label[indexs].tolist() + # 如果label2index存在则不转换了 + if not os.path.exists(self.path_fast_text_model_l2i_i2l): + label_set = set(label) + count = 0 + label2index = {} + index2label = {} + for label_one in label_set: + label2index[label_one] = count + index2label[count] = label_one + count = count + 1 + + l2i_i2l = {} + l2i_i2l['l2i'] = label2index + l2i_i2l['i2l'] = index2label + save_json(l2i_i2l, self.path_fast_text_model_l2i_i2l) + else: + l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) + + len_ql = int(rate * len(ques)) + if len_ql <= 500: # sample时候不生效,使得语料足够训练 + len_ql = len(ques) + + x = [] + print("ques to index start!") + ques_len_ql = ques[0:len_ql] + for i in tqdm(range(len_ql)): + que = ques_len_ql[i] + que_embed = embed.sentence2idx(que) + x.append(que_embed) # [[], ] + label_zo = [] + print("label to onehot start!") + label_len_ql = label[0:len_ql] + for j in tqdm(range(len_ql)): + label_one = label_len_ql[j] + label_zeros = [0] * len(l2i_i2l['l2i']) + label_zeros[l2i_i2l['l2i'][label_one]] = 1 + label_zo.append(label_zeros) + + count = 0 + if embedding_type in ['bert', 'albert']: + x_, y_ = np.array(x), np.array(label_zo) + x_1 = np.array([x[0] for x in x_]) + x_2 = np.array([x[1] for x in x_]) + x_all = [x_1, x_2] + return x_all, y_ + elif embedding_type == 'xlnet': + count += 1 + if count == 1: + x_0 = x[0] + print(x[0][0][0]) + x_, y_ = x, np.array(label_zo) + x_1 = np.array([x[0][0] for x in x_]) + x_2 = np.array([x[1][0] for x in x_]) + x_3 = np.array([x[2][0] for x in x_]) + if embed.trainable: + x_4 = np.array([x[3][0] for x in x_]) + x_all = [x_1, x_2, x_3, x_4] + else: + x_all = [x_1, x_2, x_3] + return x_all, y_ + else: + x_, y_ = np.array(x), np.array(label_zo) + return x_, y_ + + +class PreprocessTextMulti: + """ + 数据预处理, 输入为csv格式, [label,ques] + """ + def __init__(self, path_model_dir): + self.l2i_i2l = None + self.path_fast_text_model_vocab2index = path_model_dir + 'vocab2index.json' + self.path_fast_text_model_l2i_i2l = path_model_dir + 'l2i_i2l.json' + if os.path.exists(self.path_fast_text_model_l2i_i2l): + self.l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) + + def prereocess_idx(self, pred, digits=5): + if os.path.exists(self.path_fast_text_model_l2i_i2l): + pred_i2l = {} + i2l = self.l2i_i2l['i2l'] + for i in range(len(pred)): + pred_i2l[i2l[str(i)]] = round(float(pred[i]), digits) + pred_i2l_rank = [sorted(pred_i2l.items(), key=lambda k: k[1], reverse=True)] + return pred_i2l_rank + else: + raise RuntimeError("path_fast_text_model_label2index is None") + + def prereocess_pred_xid(self, pred): + if os.path.exists(self.path_fast_text_model_l2i_i2l): + pred_l2i = {} + l2i = self.l2i_i2l['l2i'] + for i in range(len(pred)): + pred_l2i[pred[i]] = l2i[pred[i]] + pred_l2i_rank = [sorted(pred_l2i.items(), key=lambda k: k[1], reverse=True)] + return pred_l2i_rank + else: + raise RuntimeError("path_fast_text_model_label2index is None") + + def preprocess_label_ques_to_idx(self, embedding_type, path, embed, rate=1, shuffle=True): + if type(path) == str: + label_ques = txt_read(path) + ques = list() + label = list() + for lq in label_ques[1:]: + lqs = lq.split('|,|') + ques.append(lqs[1]) + label.append(lqs[0]) + elif type(path) == list and ',' in path[0]: + label = [label_ques.split(',')[0] for label_ques in path] + ques = [label_ques.split(',')[1] for label_ques in path] + else: + raise RuntimeError('type of path is not true!') + + len_ql = int(rate * len(ques)) + if len_ql <= 50: # 数量较少时候全取, 不管rate + len_ql = len(ques) + ques = ques[: len_ql] + label = label[: len_ql] + print('rate ok!') + + ques = [str(q).strip().upper() for q in ques] + + if shuffle: + ques = np.array(ques) + label = np.array(label) + indexs = [ids for ids in range(len(label))] + random.shuffle(indexs) + ques, label = ques[indexs].tolist(), label[indexs].tolist() + + if not os.path.exists(self.path_fast_text_model_l2i_i2l): + from keras_textclassification.conf.path_config import path_label + byte_multi_news_label = txt_read(path_label) + byte_multi_news_label = [i.strip().upper() for i in byte_multi_news_label] + + label_set = set(byte_multi_news_label) + len_label_set = len(label_set) + count = 0 + label2index = {} + index2label = {} + for label_one in label_set: + label2index[label_one] = count + index2label[count] = label_one + count = count + 1 + + l2i_i2l = {} + l2i_i2l['l2i'] = label2index + l2i_i2l['i2l'] = index2label + save_json(l2i_i2l, self.path_fast_text_model_l2i_i2l) + else: + l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) + len_label_set = len(l2i_i2l['l2i']) + + + x = [] + print("ques to index start!") + for i in tqdm(range(len_ql)): + que = ques[i] + que_embed = embed.sentence2idx(que) + x.append(que_embed) # [[], ] + + print('que_embed ok!') + + # 转化为多标签类标 + label_multi_list = [] + count = 0 + print("label to onehot start!") + for j in tqdm(range(len_ql)): + l = label[j] + count += 1 + label_single = str(l).strip().upper().split(',') + label_single_index = [l2i_i2l['l2i'][ls] for ls in label_single] + label_multi = transform_multilabel_to_multihot(label_single_index, label=len_label_set) + label_multi_list.append(label_multi) + + print('label_multi_list ok!') + count = 0 + if embedding_type in ['bert', 'albert']: + x_, y_ = np.array(x), np.array(label_multi_list) + x_1 = np.array([x[0] for x in x_]) + x_2 = np.array([x[1] for x in x_]) + x_all = [x_1, x_2] + return x_all, y_ + elif embedding_type == 'xlnet': + count += 1 + if count == 1: + x_0 = x[0] + print(x[0][0][0]) + x_, y_ = x, np.array(label_multi_list) + x_1 = np.array([x[0][0] for x in x_]) + x_2 = np.array([x[1][0] for x in x_]) + x_3 = np.array([x[2][0] for x in x_]) + x_all = [x_1, x_2, x_3] + return x_all, y_ + else: + x_, y_ = np.array(x), np.array(label_multi_list) + return x_, y_ + + +class PreprocessSim: + """ + 数据预处理, 输入为csv格式, [label,ques] + """ + def __init__(self, path_model_dir): + self.l2i_i2l = None + self.path_fast_text_model_vocab2index = path_model_dir + 'vocab2index.json' + self.path_fast_text_model_l2i_i2l = path_model_dir + 'l2i_i2l.json' + if os.path.exists(self.path_fast_text_model_l2i_i2l): + self.l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) + + def prereocess_idx(self, pred, digits=5): + if os.path.exists(self.path_fast_text_model_l2i_i2l): + pred_i2l = {} + i2l = self.l2i_i2l['i2l'] + for i in range(len(pred)): + pred_i2l[i2l[str(i)]] = round(float(pred[i]), digits) + pred_i2l_rank = [sorted(pred_i2l.items(), key=lambda k: k[1], reverse=True)] + return pred_i2l_rank + else: + raise RuntimeError("path_fast_text_model_label2index is None") + + def prereocess_pred_xid(self, pred): + if os.path.exists(self.path_fast_text_model_l2i_i2l): + pred_l2i = {} + l2i = self.l2i_i2l['l2i'] + for i in range(len(pred)): + pred_l2i[pred[i]] = l2i[pred[i]] + pred_l2i_rank = [sorted(pred_l2i.items(), key=lambda k: k[1], reverse=True)] + return pred_l2i_rank + else: + raise RuntimeError("path_fast_text_model_label2index is None") + + def preprocess_label_ques_to_idx(self, embedding_type, path, embed, rate=1, shuffle=True): + data = pd.read_csv(path) + ques_1 = data['sentence1'].tolist() + ques_2 = data['sentence2'].tolist() + label = data['label'].tolist() + ques_1 = [str(q1).upper() for q1 in ques_1] + ques_2 = [str(q2).upper() for q2 in ques_2] + + label = [str(l).upper() for l in label] + if shuffle: + ques_1 = np.array(ques_1) + ques_2 = np.array(ques_2) + label = np.array(label) + indexs = [ids for ids in range(len(label))] + random.shuffle(indexs) + ques_1, ques_2, label = ques_1[indexs].tolist(), ques_2[indexs].tolist(), label[indexs].tolist() + # 如果label2index存在则不转换了 + if not os.path.exists(self.path_fast_text_model_l2i_i2l): + label_set = set(label) + count = 0 + label2index = {} + index2label = {} + for label_one in label_set: + label2index[label_one] = count + index2label[count] = label_one + count = count + 1 + + l2i_i2l = {} + l2i_i2l['l2i'] = label2index + l2i_i2l['i2l'] = index2label + save_json(l2i_i2l, self.path_fast_text_model_l2i_i2l) + else: + l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) + + len_ql = int(rate * len(label)) + if len_ql <= 500: # sample时候不生效,使得语料足够训练 + len_ql = len(label) + + x = [] + print("ques to index start!") + for i in tqdm(range(len_ql)): + que_1 = ques_1[i] + que_2 = ques_2[i] + que_embed = embed.sentence2idx(text=que_1, second_text=que_2) + x.append(que_embed) # [[], ] + label_zo = [] + print("label to onehot start!") + label_len_ql = label[0:len_ql] + for j in tqdm(range(len_ql)): + label_one = label_len_ql[j] + label_zeros = [0] * len(l2i_i2l['l2i']) + label_zeros[l2i_i2l['l2i'][label_one]] = 1 + label_zo.append(label_zeros) + + if embedding_type in ['bert', 'albert']: + x_, y_ = np.array(x), np.array(label_zo) + x_1 = np.array([x[0] for x in x_]) + x_2 = np.array([x[1] for x in x_]) + x_all = [x_1, x_2] + return x_all, y_ + + +class PreprocessSimCCKS2020baidu: + """ + 数据预处理, 输入为csv格式, [label,ques] + """ + def __init__(self, path_model_dir): + self.l2i_i2l = None + self.path_fast_text_model_vocab2index = path_model_dir + 'vocab2index.json' + self.path_fast_text_model_l2i_i2l = path_model_dir + 'l2i_i2l.json' + if os.path.exists(self.path_fast_text_model_l2i_i2l): + self.l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) + + def prereocess_idx(self, pred): + if os.path.exists(self.path_fast_text_model_l2i_i2l): + pred_i2l = {} + i2l = self.l2i_i2l['i2l'] + for i in range(len(pred)): + pred_i2l[i2l[str(i)]] = pred[i] + pred_i2l_rank = [sorted(pred_i2l.items(), key=lambda k: k[1], reverse=True)] + return pred_i2l_rank + else: + raise RuntimeError("path_fast_text_model_label2index is None") + + def prereocess_pred_xid(self, pred): + if os.path.exists(self.path_fast_text_model_l2i_i2l): + pred_l2i = {} + l2i = self.l2i_i2l['l2i'] + for i in range(len(pred)): + pred_l2i[pred[i]] = l2i[pred[i]] + pred_l2i_rank = [sorted(pred_l2i.items(), key=lambda k: k[1], reverse=True)] + return pred_l2i_rank + else: + raise RuntimeError("path_fast_text_model_label2index is None") + + def preprocess_label_ques_to_idx(self, embedding_type, path, embed, + rate=1, shuffle=True, graph=None): + if "json" in path: + datas = txt_read(path) + ques_1 = [] + ques_2 = [] + label = [] + offset = [] + mention = [] + for data_str in datas: + data = json.loads(data_str) + ques_1 += [data['sentence1']] + ques_2 += [data['sentence2']] + mention += [data['mention']] + label += [data['label']] + offset += [data['offset']] + elif "csv" in path: + data = pd.read_csv(path) + ques_1 = data['sentence1'].tolist() + ques_2 = data['sentence2'].tolist() + label = data['label'].tolist() + offset = data['offset'].tolist() + + ques_1 = [str(q1).upper() for q1 in ques_1] + ques_2 = [str(q2).upper() for q2 in ques_2] + + # label = [str(l).upper() for l in label] + label = [str(l) for l in label] + if shuffle: + ques_1 = np.array(ques_1) + ques_2 = np.array(ques_2) + label = np.array(label) + mention = np.array(mention) + offset = np.array(offset) + + indexs = [ids for ids in range(len(label))] + random.shuffle(indexs) + ques_1 = ques_1[indexs].tolist() + ques_2 = ques_2[indexs].tolist() + label = label[indexs].tolist() + mention = mention[indexs].tolist() + offset = offset[indexs].tolist() + # 如果label2index存在则不转换了 + if not os.path.exists(self.path_fast_text_model_l2i_i2l): + label_set = set(label) + count = 0 + label2index = {} + index2label = {} + for label_one in label_set: + label2index[label_one] = count + index2label[count] = label_one + count = count + 1 + + l2i_i2l = {} + l2i_i2l['l2i'] = label2index + l2i_i2l['i2l'] = index2label + save_json(l2i_i2l, self.path_fast_text_model_l2i_i2l) + else: + l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) + + len_ql = int(rate * len(label)) + if len_ql <= 1: # sample时候不生效,使得语料足够训练 + len_ql = len(label) + + x = [] + print("ques to index start!") + for i in tqdm(range(len_ql)): + que_1 = ques_1[i] + que_2 = ques_2[i] + mention_1 = mention[i] + # que_embed = embed.sentence2idx(text=que_1, second_text=que_2) + # x.append(que_embed) # [[], ] + offset_i = int(offset[i]) + # ques_entity = que_1 + "##" + que_1[offset_i+len(que_2):] + # ques_entity = que_1 + # que_embed1 = embed.sentence2idx(text=que_1, second_text=que_2) + if embedding_type in ['bert', 'albert']: + ########################################1111111############## + # [input_id, input_type_id] = que_embed + # input_entity_mask = [0] * len(input_id) + # input_entity_mask[offset_i:offset_i+len(que_2)] = [1] * len(que_2) + # # x.append(que_embed) # [[], ] + # x.append([input_id, input_type_id, input_entity_mask]) + # # x.append([input_id, input_type_id, input_entity_mask, offset_i]) + ########################################2222222指针网络###################################### + # [input_id, input_type_id] = que_embed + # input_start_mask = [0] * len(input_id) + # input_start_mask[offset_i] = 1 + # input_end_mask = [0] * len(input_id) + # input_end_mask[offset_i + len(mention_1) - 1] = 1 + # x.append([input_id, input_type_id, input_start_mask, input_start_mask]) + ########################################分开两个句子################################################### + que_embed_1 = embed.sentence2idx(text=que_1) + # que_embed_1 = [que[:54] for que in que_embed_1] + + que_embed_2 = embed.sentence2idx(text=que_2) + # que_embed_2 = [que[:256-54] for que in que_embed_2] + try: + """ques1""" + [input_id_1, input_type_id_1, input_mask_1] = que_embed_1 + input_start_mask_1 = [0] * len(input_id_1) + input_start_mask_1[offset_i] = 1 + input_end_mask_1 = [0] * len(input_id_1) + input_end_mask_1[offset_i+len(mention_1)-1] = 1 + input_entity_mask_1 = [0] * len(input_id_1) + input_entity_mask_1[offset_i:offset_i+len(mention_1)] = [1] * len(mention_1) + """ques2""" + [input_id_2, input_type_id_2, input_mask_2] = que_embed_2 + kind_2 = [0] * len(input_type_id_2) + que_2_sp = que_2.split("|") + que_2_sp_sp = que_2_sp[0].split(":") + kind_2_start = len(que_2_sp_sp[0]) - 1 + kind_2_end = kind_2_start + len(que_2_sp_sp[1]) - 1 + kind_2[kind_2_start:kind_2_end] = [1] * (kind_2_end-kind_2_start) + kind_21 = [0] * len(input_type_id_2) + if "标签" in que_2_sp[1]: + que_21_sp_sp = que_2_sp[1].split(":") + kind_21_start = len(que_2_sp[0]) + len(que_21_sp_sp[0]) - 1 + kind_21_end = len(que_2_sp[0]) + len(que_21_sp_sp[0]) + len(que_21_sp_sp[1]) - 1 + kind_21[kind_21_start:kind_21_end] = [1] * (kind_21_end - kind_21_start) + except Exception as e: + print(str(e)) + gg = 0 + + x.append([input_id_1, input_type_id_1, input_mask_1, input_start_mask_1, input_end_mask_1, input_entity_mask_1, + input_id_2, input_type_id_2, input_mask_2, kind_2, kind_21]) + + + elif embedding_type == 'xlnet': + if embed.trainable: + [token_input, segment_input, memory_length_input, mask_input] = que_embed + input_entity_mask = [0] * len(token_input) + input_entity_mask[offset_i:offset_i + len(que_2)] = [1] * len(que_2) + # x.append(que_embed) # [[], ] + x.append([token_input, segment_input, memory_length_input, mask_input, input_entity_mask]) + else: + [token_input, segment_input, memory_length_input] = que_embed + input_entity_mask = [0] * len(token_input) + input_entity_mask[offset_i:offset_i + len(que_2)] = [1] * len(que_2) + x.append([token_input, segment_input, memory_length_input, input_entity_mask]) + + label_zo = [] + print("label to onehot start!") + label_len_ql = label[0:len_ql] + for j in tqdm(range(len_ql)): + label_one = label_len_ql[j] + label_zeros = [0] * len(l2i_i2l['l2i']) + label_zeros[l2i_i2l['l2i'][label_one]] = 1 + label_zo.append(label_zeros) + + if embedding_type in ['bert', 'albert']: + x_, y_ = np.array(x), np.array(label_zo) + # x_1 = np.array([x[0] for x in x_]) + # x_2 = np.array([x[1] for x in x_]) + # x_3 = np.array([x[2] for x in x_]) + # x_4 = np.array([x[3] for x in x_]) + # x_all = [x_1, x_2, x_3, x_4] + x_all = [] + for i in range(len(x_[0])): + x_all.append(np.array([x[i] for x in x_])) + return x_all, y_ + elif embedding_type == 'xlnet': + x_, y_ = x, np.array(label_zo) + x_1 = np.array([x[0][0] for x in x_]) + x_2 = np.array([x[1][0] for x in x_]) + x_3 = np.array([x[2][0] for x in x_]) + x_4 = np.array([x[3][0] for x in x_]) + if embed.trainable: + x_5 = np.array([x[4][0] for x in x_]) + x_all = [x_1, x_2, x_3, x_4, x_5] + else: + x_all = [x_1, x_2, x_3, x_4] + return x_all, y_ + else: + x_, y_ = np.array(x), np.array(label_zo) + return x_, y_ + + +class PreprocessSimConv2019: + """ + 数据预处理, 输入为csv格式, [label,ques] + """ + def __init__(self, path_model_dir): + self.l2i_i2l = None + self.path_fast_text_model_vocab2index = path_model_dir + 'vocab2index.json' + self.path_fast_text_model_l2i_i2l = path_model_dir + 'l2i_i2l.json' + if os.path.exists(self.path_fast_text_model_l2i_i2l): + self.l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) + + def prereocess_idx(self, pred): + if os.path.exists(self.path_fast_text_model_l2i_i2l): + pred_i2l = {} + i2l = self.l2i_i2l['i2l'] + for i in range(len(pred)): + pred_i2l[i2l[str(i)]] = pred[i] + pred_i2l_rank = [sorted(pred_i2l.items(), key=lambda k: k[1], reverse=True)] + return pred_i2l_rank + else: + raise RuntimeError("path_fast_text_model_label2index is None") + + def prereocess_pred_xid(self, pred): + if os.path.exists(self.path_fast_text_model_l2i_i2l): + pred_l2i = {} + l2i = self.l2i_i2l['l2i'] + for i in range(len(pred)): + pred_l2i[pred[i]] = l2i[pred[i]] + pred_l2i_rank = [sorted(pred_l2i.items(), key=lambda k: k[1], reverse=True)] + return pred_l2i_rank + else: + raise RuntimeError("path_fast_text_model_label2index is None") + + def preprocess_label_ques_to_idx(self, embedding_type, path, embed, rate=1, shuffle=True): + data = pd.read_csv(path) + # category, query1, query2, label + ques_1 = data['query1'].tolist() + category = data['category'].tolist() + ques_2 = data['query2'].tolist() + label = data['label'].tolist() + ques_1 = [str(q1).upper() for q1 in ques_1] + ques_2 = [str(q2).upper() for q2 in ques_2] + + label = [str(l).upper() for l in label] + if shuffle: + ques_1 = np.array(ques_1) + ques_2 = np.array(ques_2) + category = np.array(category) + label = np.array(label) + indexs = [ids for ids in range(len(label))] + random.shuffle(indexs) + ques_1, ques_2, label, category = ques_1[indexs].tolist(), ques_2[indexs].tolist(), label[indexs].tolist(), category[indexs].tolist() + # 如果label2index存在则不转换了 + if not os.path.exists(self.path_fast_text_model_l2i_i2l): + label_set = set(label) + count = 0 + label2index = {} + index2label = {} + for label_one in label_set: + label2index[label_one] = count + index2label[count] = label_one + count = count + 1 + + l2i_i2l = {} + l2i_i2l['l2i'] = label2index + l2i_i2l['i2l'] = index2label + save_json(l2i_i2l, self.path_fast_text_model_l2i_i2l) + else: + l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) + + len_ql = int(rate * len(label)) + if len_ql <= 500: # sample时候不生效,使得语料足够训练 + len_ql = len(label) + + x = [] + print("ques to index start!") + len_ques_list = [] + label_list = [] + for i in tqdm(range(len_ql)): + que_1 = ques_1[i] + que_2 = ques_2[i] + category_3 = category[i] + que_embed = embed.sentence2idx(text=category_3+":"+que_1, second_text=category_3+":"+que_2) + + # que_embed = embed.sentence2idx(text=category_3+":"+que_1, second_text=category_3+":"+que_2) + # que_embed = embed.sentence2idx(text=que_1, second_text=que_2) + x.append(que_embed) # [[], ] + len_ques_list.append(len(que_1+que_2)) + label_list.append(category_3) + len_ques_counter = Counter(len_ques_list) + label_counter = Counter(label_list) + print("长度:{}".format(dict(len_ques_counter))) + print("长度字典:{}".format(dict(len_ques_counter).keys())) + print("最大长度:{}".format(max(list(dict(len_ques_counter).keys())))) + print("类别字典:{}".format(dict(label_counter))) + label_zo = [] + print("label to onehot start!") + label_len_ql = label[0:len_ql] + for j in tqdm(range(len_ql)): + label_one = label_len_ql[j] + label_zeros = [0] * len(l2i_i2l['l2i']) + label_zeros[l2i_i2l['l2i'][label_one]] = 1 + label_zo.append(label_zeros) + + if embedding_type in ['bert', 'albert']: + x_, y_ = np.array(x), np.array(label_zo) + x_1 = np.array([x[0] for x in x_]) + x_2 = np.array([x[1] for x in x_]) + x_all = [x_1, x_2] + return x_all, y_ + else: + x_, y_ = np.array(x), np.array(label_zo) + + return x_, y_ + diff --git a/textCNN/__init__.py b/textCNN/__init__.py new file mode 100644 index 0000000..33539bb --- /dev/null +++ b/textCNN/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/7 22:09 +# @author :Mo +# @function : \ No newline at end of file diff --git a/textCNN/graph.py b/textCNN/graph.py new file mode 100644 index 0000000..dec7a91 --- /dev/null +++ b/textCNN/graph.py @@ -0,0 +1,176 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/3 10:51 +# @author :Mo +# @function :graph of base + + +from keras.layers import Reshape, Concatenate, Conv2D, MaxPool2D +from keras.layers import Dense, Dropout, Flatten +from keras.models import Model + +from keras_textclassification.base.graph import graph + + +class TextCNNGraph(graph): + def __init__(self, hyper_parameters): + """ + 初始化 + :param hyper_parameters: json,超参 + """ + super().__init__(hyper_parameters) + + def create_model(self, hyper_parameters): + """ + 构建神经网络 + :param hyper_parameters:json, hyper parameters of network + :return: tensor, moedl + """ + super().create_model(hyper_parameters) + embedding = self.word_embedding.output + embedding_reshape = Reshape((self.len_max, self.embed_size, 1))(embedding) + # 提取n-gram特征和最大池化, 一般不用平均池化 + conv_pools = [] + for filter in self.filters: + conv = Conv2D(filters = self.filters_num, + kernel_size = (filter, self.embed_size), + padding = 'valid', + kernel_initializer = 'normal', + activation = 'tanh', + )(embedding_reshape) + pooled = MaxPool2D(pool_size = (self.len_max - filter + 1, 1), + strides = (1, 1), + padding = 'valid', + )(conv) + conv_pools.append(pooled) + # 拼接 + x = Concatenate(axis=-1)(conv_pools) + x = Dropout(self.dropout)(x) + x = Flatten()(x) + x = Dense(units=64, activation='tanh')(x) + x = Dropout(self.dropout)(x) + output = Dense(units=self.label, activation=self.activate_classify)(x) + self.model = Model(inputs=self.word_embedding.input, outputs=output) + self.model.summary(120) + + + + # def focal_loss(self, gamma=2, alpha=0.75): # 0.25, 0.5 + def focal_loss(self, gamma=2, alpha=0.75, batch_size=None, label_num=None, epsilon=1.e-7, multi_dim=False, use_softmax=True): + from tensorflow.python.ops import array_ops + import keras.backend as K + import tensorflow as tf + def focal_loss_fixed(y_true, y_pred): # with tensorflow + eps = 1e-12 + y_pred = K.clip(y_pred, eps, 1. - eps) # improve the stability of the focal loss and see issues 1 for more information + pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred)) + pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred)) + loss = -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) - K.sum((1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0)) + return loss + + def focal_loss_all(prediction_tensor, target_tensor): + r"""Compute focal loss for predictions. + Multi-labels Focal loss formula: + FL = -alpha * (z-p)^gamma * log(p) -(1-alpha) * p^gamma * log(1-p) + ,which alpha = 0.25, gamma = 2, p = sigmoid(x), z = target_tensor. + Args: + prediction_tensor: A float tensor of shape [batch_size, num_anchors, + num_classes] representing the predicted logits for each class + target_tensor: A float tensor of shape [batch_size, num_anchors, + num_classes] representing one-hot encoded classification targets + weights: A float tensor of shape [batch_size, num_anchors] + alpha: A scalar tensor for focal loss alpha hyper-parameter + gamma: A scalar tensor for focal loss gamma hyper-parameter + Returns: + loss: A (scalar) tensor representing the value of the loss function + """ + sigmoid_p = tf.nn.sigmoid(prediction_tensor) + zeros = array_ops.zeros_like(sigmoid_p, dtype=sigmoid_p.dtype) + + # For poitive prediction, only need consider front part loss, back part is 0; + # target_tensor > zeros <=> z=1, so poitive coefficient = z - p. + pos_p_sub = array_ops.where(target_tensor > zeros, target_tensor - sigmoid_p, zeros) + + # For negative prediction, only need consider back part loss, front part is 0; + # target_tensor > zeros <=> z=1, so negative coefficient = 0. + neg_p_sub = array_ops.where(target_tensor > zeros, zeros, sigmoid_p) + per_entry_cross_ent = - alpha * (pos_p_sub ** gamma) * tf.log(tf.clip_by_value(sigmoid_p, 1e-8, 1.0)) \ + - (1 - alpha) * (neg_p_sub ** gamma) * tf.log(tf.clip_by_value(1.0 - sigmoid_p, 1e-8, 1.0)) + return tf.reduce_sum(per_entry_cross_ent) + + def focal_loss_category(logits, labels): + ''' + :param logits: [batch_size, n_class] + :param labels: [batch_size] not one-hot !!! + :return: -alpha*(1-y)^r * log(y) + 它是在哪实现 1- y 的? 通过gather选择的就是1-p,而不是通过计算实现的; + logits soft max之后是多个类别的概率,也就是二分类时候的1-P和P;多分类的时候不是1-p了; + + 怎么把alpha的权重加上去? + 通过gather把alpha选择后变成batch长度,同时达到了选择和维度变换的目的 + + 是否需要对logits转换后的概率值进行限制? + 需要的,避免极端情况的影响 + + 针对输入是 (N,P,C )和 (N,P)怎么处理? + 先把他转换为和常规的一样形状,(N*P,C) 和 (N*P,) + + bug: + ValueError: Cannot convert an unknown Dimension to a Tensor: ? + 因为输入的尺寸有时是未知的,导致了该bug,如果batchsize是确定的,可以直接修改为batchsize + + ''' + + if multi_dim: + logits = tf.reshape(logits, [-1, logits.shape[2]]) + labels = tf.reshape(labels, [-1]) + + # (Class ,1) + alpha = tf.constant([0.5]*batch_size, dtype=tf.float32) + + labels = tf.argmax(labels) # + labels = tf.cast(labels, dtype=tf.int32) + logits = tf.cast(logits, tf.float32) + if use_softmax: + # (N,Class) > N*Class + softmax = tf.reshape(tf.nn.softmax(logits), [-1]) # [batch_size * n_class] + else: + softmax = tf.reshape(tf.nn.sigmoid(logits), [-1]) # [batch_size * n_class] + # (N,) > (N,) ,但是数值变换了,变成了每个label在N*Class中的位置 + # labels_shift = tf.range(0, logits.shape[0]) * logits.shape[1] + labels + labels_shift = tf.range(0, label_num) * batch_size + labels + # (N*Class,) > (N,) + prob = tf.gather(softmax, labels_shift) + # 预防预测概率值为0的情况 ; (N,) + prob = tf.clip_by_value(prob, epsilon, 1. - epsilon) + # (Class ,1) > (N,) + alpha_choice = tf.gather(alpha, labels) + # (N,) > (N,) + weight = tf.pow(tf.subtract(1., prob), gamma) + weight = tf.multiply(alpha_choice, weight) + # (N,) > 1 + loss = -tf.reduce_sum(tf.multiply(weight, tf.log(prob))) + return loss + + return focal_loss_fixed + + + def create_compile(self): + """ + 构建优化器、损失函数和评价函数 + :return: + """ + from keras_textclassification.keras_layers.keras_radam import RAdam + from keras.optimizers import Adam + # self.model.compile(optimizer=Adam(lr=self.lr, beta_1=0.9, beta_2=0.999, decay=0.0), + # loss=[self.focal_loss(alpha=.25, gamma=2)], + # metrics=['accuracy']) + + self.model.compile(optimizer=Adam(lr=self.lr, beta_1=0.9, beta_2=0.999, decay=0.0), + loss=[self.focal_loss(alpha=.25, gamma=2)], # self.loss, # + # loss_weights=[0.6, 0.5], + # loss=[self.focal_loss(gamma=2, alpha=0.25, batch_size=self.batch_size, label_num=self.label, epsilon=1.e-7, multi_dim=False, use_softmax=False)], + # loss=[self.focal_loss(gamma=2, alpha=0.75)], + metrics=['accuracy']) # Any optimize + + diff --git a/textCNN/predict.py b/textCNN/predict.py new file mode 100644 index 0000000..93b4f8a --- /dev/null +++ b/textCNN/predict.py @@ -0,0 +1,130 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/3 10:51 +# @author :Mo +# @function :pred of text-cnn with baidu-qa-2019 in question title + + +# 适配linux +import pathlib +import sys +import os +project_path = str(pathlib.Path(os.path.abspath(__file__)).parent.parent.parent) +sys.path.append(project_path) +# 地址 +from keras_textclassification.conf.path_config import path_model, path_fineture, path_model_dir, path_hyper_parameters +# 训练验证数据地址 +from keras_textclassification.conf.path_config import path_baidu_qa_2019_train, path_baidu_qa_2019_valid +# 数据预处理, 删除文件目录下文件 +from keras_textclassification.data_preprocess.text_preprocess import PreprocessText, read_and_process, load_json +# 模型图 +from keras_textclassification.m02_TextCNN.graph import TextCNNGraph as Graph +# 模型评估 +from sklearn.metrics import classification_report +# 计算时间 +import time + +import numpy as np + + +def pred_tet(path_hyper_parameter=path_hyper_parameters, path_test=None, rate=1.0): + # 测试集的准确率 + hyper_parameters = load_json(path_hyper_parameter) + if path_test: # 从外部引入测试数据地址 + hyper_parameters['data']['val_data'] = path_test + time_start = time.time() + # graph初始化 + graph = Graph(hyper_parameters) + print("graph init ok!") + graph.load_model() + print("graph load ok!") + ra_ed = graph.word_embedding + # 数据预处理 + pt = PreprocessText(path_model_dir) + y, x = read_and_process(hyper_parameters['data']['val_data']) + # 取该数据集的百分之几的语料测试 + len_rate = int(len(y) * rate) + x = x[1:len_rate] + y = y[1:len_rate] + y_pred = [] + count = 0 + for x_one in x: + count += 1 + ques_embed = ra_ed.sentence2idx(x_one) + + if hyper_parameters['embedding_type'] in ['bert', 'albert']: + x_val_1 = np.array([ques_embed[0]]) + x_val_2 = np.array([ques_embed[1]]) + x_val = [x_val_1, x_val_2] + elif hyper_parameters['embedding_type'] == 'xlnet': + x_val_1 = np.array([ques_embed[0]]) + x_val_2 = np.array([ques_embed[1]]) + x_val_3 = np.array([ques_embed[2]]) + x_val = [x_val_1, x_val_2, x_val_3] + else: + x_val = ques_embed + # 预测 + pred = graph.predict(x_val) + pre = pt.prereocess_idx(pred[0]) + label_pred = pre[0][0][0] + if count % 1000==0: + print(label_pred) + y_pred.append(label_pred) + + print("data pred ok!") + # 预测结果转为int类型 + index_y = [pt.l2i_i2l['l2i'][i] for i in y] + index_pred = [pt.l2i_i2l['l2i'][i] for i in y_pred] + target_names = [pt.l2i_i2l['i2l'][str(i)] for i in list(set((index_pred + index_y)))] + # 评估 + report_predict = classification_report(index_y, index_pred, + target_names=target_names, digits=9) + print(report_predict) + print("耗时:" + str(time.time() - time_start)) + + +def pred_input(path_hyper_parameter=path_hyper_parameters): + # 输入预测 + # 加载超参数 + hyper_parameters = load_json(path_hyper_parameter) + pt = PreprocessText(path_model_dir) + # 模式初始化和加载 + graph = Graph(hyper_parameters) + graph.load_model() + ra_ed = graph.word_embedding + ques = '我要打王者荣耀' + # str to token + ques_embed = ra_ed.sentence2idx(ques) + if hyper_parameters['embedding_type'] in ['bert', 'albert']: + x_val_1 = np.array([ques_embed[0]]) + x_val_2 = np.array([ques_embed[1]]) + x_val = [x_val_1, x_val_2] + else: + x_val = ques_embed + # 预测 + pred = graph.predict(x_val) + # 取id to label and pred + pre = pt.prereocess_idx(pred[0]) + print(pre) + while True: + print("请输入: ") + ques = input() + ques_embed = ra_ed.sentence2idx(ques) + print(ques_embed) + if hyper_parameters['embedding_type'] in ['bert', 'albert']: + x_val_1 = np.array([ques_embed[0]]) + x_val_2 = np.array([ques_embed[1]]) + x_val = [x_val_1, x_val_2] + else: + x_val = ques_embed + pred = graph.predict(x_val) + pre = pt.prereocess_idx(pred[0]) + print(pre) + + +if __name__=="__main__": + # 测试集预测 + pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少 + + # 可输入 input 预测 + pred_input() diff --git a/textCNN/train.py b/textCNN/train.py new file mode 100644 index 0000000..480a2ea --- /dev/null +++ b/textCNN/train.py @@ -0,0 +1,89 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/3 10:51 +# @author :Mo +# @function :train of TextCNN with baidu-qa-2019 in question title + + +# 适配linux +import pathlib +import sys +import os +project_path = str(pathlib.Path(os.path.abspath(__file__)).parent.parent.parent) +sys.path.append(project_path) +# 地址 +from keras_textclassification.conf.path_config import path_model, path_fineture, path_model_dir, path_hyper_parameters +# 训练验证数据地址 +from keras_textclassification.conf.path_config import path_baidu_qa_2019_train, path_baidu_qa_2019_valid +# 数据预处理, 删除文件目录下文件 +from keras_textclassification.data_preprocess.text_preprocess import PreprocessText, delete_file +# 模型图 +from keras_textclassification.m02_TextCNN.graph import TextCNNGraph as Graph +# 计算时间 +import time + + +def train(hyper_parameters=None, rate=1.0): + if not hyper_parameters: + hyper_parameters = { + 'len_max': 50, # 句子最大长度, 固定推荐20-50, bert越长会越慢, 占用空间也会变大, 本地win10-4G设为20就好, 过大小心OOM + 'embed_size': 300, # 字/词向量维度, bert取768, word取300, char可以更小些 + 'vocab_size': 20000, # 这里随便填的,会根据代码里修改 + 'trainable': True, # embedding是静态的还是动态的, 即控制可不可以微调 + 'level_type': 'char', # 级别, 最小单元, 字/词, 填 'char' or 'word', 注意:word2vec模式下训练语料要首先切好 + 'embedding_type': 'random', # 级别, 嵌入类型, 还可以填'xlnet'、'random'、 'bert'、 'albert' or 'word2vec" + 'gpu_memory_fraction': 0.76, #gpu使用率 + 'model': {'label': 17, # 类别数 + 'batch_size': 256, # 批处理尺寸, 感觉原则上越大越好,尤其是样本不均衡的时候, batch_size设置影响比较大 + 'dropout': 0.5, # 随机失活, 概率 + 'decay_step': 100, # 学习率衰减step, 每N个step衰减一次 + 'decay_rate': 0.9, # 学习率衰减系数, 乘法 + 'epochs': 20, # 训练最大轮次 + 'patience': 3, # 早停,2-3就好 + 'lr': 5e-3, # 学习率,bert取5e-5,其他取1e-3, 对训练会有比较大的影响, 如果准确率一直上不去,可以考虑调这个参数 + 'l2': 1e-9, # l2正则化 + 'activate_classify': 'softmax', # 最后一个layer, 即分类激活函数 + 'loss': 'categorical_crossentropy', # 损失函数 + 'metrics': 'accuracy', # 保存更好模型的评价标准 + 'is_training': True, # 训练后者是测试模型 + 'path_model_dir': path_model_dir, # 模型目录 + 'model_path': path_model, + # 模型地址, loss降低则保存的依据, save_best_only=True, save_weights_only=True + 'path_hyper_parameters': path_hyper_parameters, # 模型(包括embedding),超参数地址, + 'path_fineture': path_fineture, # 保存embedding trainable地址, 例如字向量、词向量、bert向量等 + }, + 'embedding': {'layer_indexes': [1, 2, 3, 12, 13], # bert取的层数,1为embedding层,未处理 + # 'corpus_path': 'Y:/BaiduNetdiskDownload/DataSet/bert-model/chinese_bert_chinese_wwm_L-12_H-768_A-12', # embedding预训练数据地址,不配则会默认取conf里边默认的地址 + # 'corpus_path':'Y:/BaiduNetdiskDownload/DataSet/bert-model/baidu_ernie', + # keras - bert可以加载谷歌版bert, 百度版ernie(需转换,https: // github.com / ArthurRizar / tensorflow_ernie), 哈工大版bert - wwm(tf框架,https: // github.com / ymcui / Chinese - BERT - wwm) + }, + 'data':{'train_data': path_baidu_qa_2019_train, # 训练数据 + 'val_data': path_baidu_qa_2019_valid # 验证数据 + }, + } + + # 删除先前存在的模型和embedding微调模型等 + delete_file(path_model_dir) + time_start = time.time() + # graph初始化 + graph = Graph(hyper_parameters) + print("graph init ok!") + ra_ed = graph.word_embedding + # 数据预处理 + pt = PreprocessText(path_model_dir) + x_train, y_train = pt.preprocess_label_ques_to_idx(hyper_parameters['embedding_type'], + hyper_parameters['data']['train_data'], + ra_ed, rate=rate, shuffle=True) + x_val, y_val = pt.preprocess_label_ques_to_idx(hyper_parameters['embedding_type'], + hyper_parameters['data']['val_data'], + ra_ed, rate=rate, shuffle=True) + print("data propress ok!") + print(len(y_train)) + # 训练 graph.fit(x_train, y_train, x_val, y_val) + print("耗时:" + str(time.time()-time_start)) + + +if __name__=="__main__": + train(rate=1) + + From e8eb0e1ee2c37c010fc600f8c319fdc6301b2570 Mon Sep 17 00:00:00 2001 From: atom-zh Date: Sun, 20 Jun 2021 18:04:14 +0800 Subject: [PATCH 2/5] upload category to labels file --- data/category2labels.json | 73 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 data/category2labels.json diff --git a/data/category2labels.json b/data/category2labels.json new file mode 100644 index 0000000..e79fc90 --- /dev/null +++ b/data/category2labels.json @@ -0,0 +1,73 @@ +{ + "A": [ + "主题活动", + "党性学习", + "十九大", + "廉政教育", + "政策实践", + "相关会议", + "重要讲话" + ], + "B": [ + "听取工作汇报", + "换届选举", + "提升基层工作", + "科普人才队伍建设", + "群团工作会议" + ], + "C": [ + "助推新农村文化建设", + "城区精神文明共建", + "实践科技与科普服务平台", + "少数民族村的保护与发展", + "展览献爱心", + "文明单位考评", + "道德讲堂" + ], + "D":[ + "优秀人才评选", + "先进示范", + "双亮双比活动", + "授予荣誉称号", + "自制教具评选", + "表彰", + "评审" + ], + "E":[ + "三长制", + "创业青年座谈会", + "征求意见和建议", + "最美科技工作者", + "科技服务", + "科技者活动日", + "竞赛" + ], + "F":[ + "创新", + "助农", + "发展战略", + "对接发展", + "科技服务平台", + "科技节", + "经济发展新模式" + ], + "G":[ + "三下乡", + "科学素质小组会议", + "科技培训", + "科普服务", + "科普活动", + "科学普及" + ], + "H":[ + "座谈会", + "服务农业", + "服务群众", + "社区共建", + "走访调研调查", + "百汇联百村" + ], + "I":[ + "学会活动" + ] +} From 16daf103963bdeb31262bf6c812cd4b0671ccc4e Mon Sep 17 00:00:00 2001 From: atom-zh Date: Sat, 14 Aug 2021 00:58:39 +0800 Subject: [PATCH 3/5] modify gitigore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 6c18571..8946786 100644 --- a/.gitignore +++ b/.gitignore @@ -103,4 +103,5 @@ venv.bak/ /site # mypy -.mypy_cache/ +out/ + From 51a96fbf01b043179955924cf7b73c4764478761 Mon Sep 17 00:00:00 2001 From: atom-zh Date: Sat, 14 Aug 2021 01:01:30 +0800 Subject: [PATCH 4/5] upload the main code --- base/__init__.py | 5 + base/embedding.py | 220 +++++++ base/graph.py | 286 +++++++++ {config => conf}/__init__.py | 0 {config => conf}/logger_config.py | 2 +- conf/path_config.py | 52 ++ config/path_config.py | 54 -- data/01-anhui.xlsx | Bin 50784 -> 0 bytes data/category2labels.json | 73 --- data/labels.csv | 84 --- data/train.csv | 343 ----------- data/valid.csv | 63 -- data_preprocess/data_excel2csv.py | 247 ++++++-- data_preprocess/data_split.py | 9 +- data_preprocess/generator_preprocess.py | 10 +- data_preprocess/text_preprocess.py | 449 +------------- data_preprocess/utils.py | 61 ++ keras_layers/__init__.py | 5 + keras_layers/albert/__init__.py | 5 + keras_layers/albert/albert.py | 331 ++++++++++ keras_layers/attention_dot.py | 104 ++++ keras_layers/attention_self.py | 51 ++ keras_layers/capsule.py | 287 +++++++++ keras_layers/highway.py | 61 ++ keras_layers/k_max_pooling.py | 36 ++ keras_layers/keras_lookahead.py | 77 +++ keras_layers/keras_radam.py | 96 +++ keras_layers/non_mask_layer.py | 32 + keras_layers/transformer.py | 577 ++++++++++++++++++ keras_layers/transformer_utils/__init__.py | 5 + keras_layers/transformer_utils/embedding.py | 91 +++ keras_layers/transformer_utils/feedforward.py | 122 ++++ .../transformer_utils/layer_normalization.py | 107 ++++ .../transformer_utils/multi_head_attention.py | 225 +++++++ keras_layers/transformer_utils/readme.md | 7 + .../scale_dot_product_attention.py | 82 +++ .../triangle_position_embedding.py | 116 ++++ {textCNN => mLSTM}/__init__.py | 0 mLSTM/graph.py | 221 +++++++ mLSTM/train.py | 179 ++++++ mTextCNN/__init__.py | 5 + {textCNN => mTextCNN}/graph.py | 23 +- mTextCNN/train.py | 183 ++++++ mTextRCNN/__init__.py | 5 + mTextRCNN/graph.py | 172 ++++++ mTextRCNN/train.py | 177 ++++++ textCNN/predict.py | 130 ---- textCNN/train.py | 89 --- 48 files changed, 4215 insertions(+), 1344 deletions(-) create mode 100644 base/__init__.py create mode 100644 base/embedding.py create mode 100644 base/graph.py rename {config => conf}/__init__.py (100%) rename {config => conf}/logger_config.py (95%) create mode 100644 conf/path_config.py delete mode 100644 config/path_config.py delete mode 100644 data/01-anhui.xlsx delete mode 100644 data/category2labels.json delete mode 100644 data/labels.csv delete mode 100644 data/train.csv delete mode 100644 data/valid.csv create mode 100644 data_preprocess/utils.py create mode 100644 keras_layers/__init__.py create mode 100644 keras_layers/albert/__init__.py create mode 100644 keras_layers/albert/albert.py create mode 100644 keras_layers/attention_dot.py create mode 100644 keras_layers/attention_self.py create mode 100644 keras_layers/capsule.py create mode 100644 keras_layers/highway.py create mode 100644 keras_layers/k_max_pooling.py create mode 100644 keras_layers/keras_lookahead.py create mode 100644 keras_layers/keras_radam.py create mode 100644 keras_layers/non_mask_layer.py create mode 100644 keras_layers/transformer.py create mode 100644 keras_layers/transformer_utils/__init__.py create mode 100644 keras_layers/transformer_utils/embedding.py create mode 100644 keras_layers/transformer_utils/feedforward.py create mode 100644 keras_layers/transformer_utils/layer_normalization.py create mode 100644 keras_layers/transformer_utils/multi_head_attention.py create mode 100644 keras_layers/transformer_utils/readme.md create mode 100644 keras_layers/transformer_utils/scale_dot_product_attention.py create mode 100644 keras_layers/transformer_utils/triangle_position_embedding.py rename {textCNN => mLSTM}/__init__.py (100%) create mode 100644 mLSTM/graph.py create mode 100644 mLSTM/train.py create mode 100644 mTextCNN/__init__.py rename {textCNN => mTextCNN}/graph.py (89%) create mode 100644 mTextCNN/train.py create mode 100644 mTextRCNN/__init__.py create mode 100644 mTextRCNN/graph.py create mode 100644 mTextRCNN/train.py delete mode 100644 textCNN/predict.py delete mode 100644 textCNN/train.py diff --git a/base/__init__.py b/base/__init__.py new file mode 100644 index 0000000..abce1c9 --- /dev/null +++ b/base/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/3 11:24 +# @author :Mo +# @function : \ No newline at end of file diff --git a/base/embedding.py b/base/embedding.py new file mode 100644 index 0000000..e23a0f6 --- /dev/null +++ b/base/embedding.py @@ -0,0 +1,220 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/3 11:29 +# @author :Mo +# @function :embeddings of model, base embedding of random, word2vec or bert + +from conf.path_config import path_embedding_vector_word2vec_char, path_embedding_vector_word2vec_word +from conf.path_config import path_embedding_random_char, path_embedding_random_word +from data_preprocess.text_preprocess import get_ngram +from keras.layers import Add, Embedding, Lambda +from gensim.models import KeyedVectors +from keras.models import Input, Model +import numpy as np +import jieba +import os + +class BaseEmbedding: + def __init__(self, hyper_parameters): + self.len_max = hyper_parameters.get('len_max', 50) # 文本最大长度, 建议25-50 + self.embed_size = hyper_parameters.get('embed_size', 300) # 嵌入层尺寸 + self.vocab_size = hyper_parameters.get('vocab_size', 30000) # 字典大小, 这里随便填的,会根据代码里修改 + self.trainable = hyper_parameters.get('trainable', False) # 是否微调, 例如静态词向量、动态词向量、微调bert层等, random也可以 + self.level_type = hyper_parameters.get('level_type', 'char') # 还可以填'word' + self.embedding_type = hyper_parameters.get('embedding_type', 'word2vec') # 词嵌入方式,可以选择'xlnet'、'bert'、'random'、'word2vec' + + # 自适应, 根据level_type和embedding_type判断corpus_path + if self.level_type == "word": + if self.embedding_type == "random": + self.corpus_path = hyper_parameters['embedding'].get('corpus_path', path_embedding_random_word) + elif self.embedding_type == "word2vec": + self.corpus_path = hyper_parameters['embedding'].get('corpus_path', path_embedding_vector_word2vec_word) + elif self.embedding_type == "bert": + raise RuntimeError("bert level_type is 'char', not 'word'") + elif self.embedding_type == "xlnet": + raise RuntimeError("xlnet level_type is 'char', not 'word'") + elif self.embedding_type == "albert": + raise RuntimeError("albert level_type is 'char', not 'word'") + else: + raise RuntimeError("embedding_type must be 'random', 'word2vec' or 'bert'") + elif self.level_type == "char": + if self.embedding_type == "random": + self.corpus_path = hyper_parameters['embedding'].get('corpus_path', path_embedding_random_char) + elif self.embedding_type == "word2vec": + self.corpus_path = hyper_parameters['embedding'].get('corpus_path', path_embedding_vector_word2vec_char) + elif self.embedding_type == "bert": + self.corpus_path = hyper_parameters['embedding'].get('corpus_path', path_embedding_bert) + elif self.embedding_type == "xlnet": + self.corpus_path = hyper_parameters['embedding'].get('corpus_path', path_embedding_xlnet) + elif self.embedding_type == "albert": + self.corpus_path = hyper_parameters['embedding'].get('corpus_path', path_embedding_albert) + else: + raise RuntimeError("embedding_type must be 'random', 'word2vec' or 'bert'") + elif self.level_type == "ngram": + if self.embedding_type == "random": + self.corpus_path = hyper_parameters['embedding'].get('corpus_path') + if not self.corpus_path: + raise RuntimeError("corpus_path must exists!") + else: + raise RuntimeError("embedding_type must be 'random', 'word2vec' or 'bert'") + else: + raise RuntimeError("level_type must be 'char' or 'word'") + # 定义的符号 + self.ot_dict = {'[PAD]': 0, + '[UNK]': 1, + '[BOS]': 2, + '[EOS]': 3, } + self.deal_corpus() + self.build() + + def deal_corpus(self): # 处理语料 + pass + + def build(self): + self.token2idx = {} + self.idx2token = {} + + def sentence2idx(self, text, second_text=None): + if second_text: + second_text = "[SEP]" + str(second_text).upper() + # text = extract_chinese(str(text).upper()) + text = str(text).upper() + + if self.level_type == 'char': + text = list(text) + elif self.level_type == 'word': + text = list(jieba.cut(text, cut_all=False, HMM=True)) + else: + raise RuntimeError("your input level_type is wrong, it must be 'word' or 'char'") + text = [text_one for text_one in text] + len_leave = self.len_max - len(text) + if len_leave >= 0: + text_index = [self.token2idx[text_char] if text_char in self.token2idx else self.token2idx['[UNK]'] for + text_char in text] + [self.token2idx['[PAD]'] for i in range(len_leave)] + else: + text_index = [self.token2idx[text_char] if text_char in self.token2idx else self.token2idx['[UNK]'] for + text_char in text[0:self.len_max]] + return text_index + + def idx2sentence(self, idx): + assert type(idx) == list + text_idx = [self.idx2token[id] if id in self.idx2token else self.idx2token['[UNK]'] for id in idx] + return "".join(text_idx) + + +class RandomEmbedding(BaseEmbedding): + def __init__(self, hyper_parameters): + self.ngram_ns = hyper_parameters['embedding'].get('ngram_ns', [1, 2, 3]) # ngram信息, 根据预料获取 + # self.path = hyper_parameters.get('corpus_path', path_embedding_random_char) + super().__init__(hyper_parameters) + + def deal_corpus(self): + token2idx = self.ot_dict.copy() + count = 3 + if 'term' in self.corpus_path: + with open(file=self.corpus_path, mode='r', encoding='utf-8') as fd: + while True: + term_one = fd.readline() + if not term_one: + break + term_one = term_one.strip() + if term_one not in token2idx: + count = count + 1 + token2idx[term_one] = count + + elif os.path.exists(self.corpus_path): + with open(file=self.corpus_path, mode='r', encoding='utf-8') as fd: + terms = fd.readlines() + for term_one in terms: + if self.level_type == 'char': + text = list(term_one.replace(' ', '').strip()) + elif self.level_type == 'word': + text = list(jieba.cut(term_one, cut_all=False, HMM=False)) + elif self.level_type == 'ngram': + text = get_ngram(term_one, ns=self.ngram_ns) + else: + raise RuntimeError("your input level_type is wrong, it must be 'word', 'char', 'ngram'") + for text_one in text: + if text_one not in token2idx: + count = count + 1 + token2idx[text_one] = count + else: + raise RuntimeError("your input corpus_path is wrong, it must be 'dict' or 'corpus'") + self.token2idx = token2idx + self.idx2token = {} + for key, value in self.token2idx.items(): + self.idx2token[value] = key + + def build(self, **kwargs): + self.vocab_size = len(self.token2idx) + self.input = Input(shape=(self.len_max,), dtype='int32') + self.output = Embedding(self.vocab_size+1, + self.embed_size, + input_length=self.len_max, + trainable=self.trainable, + )(self.input) + self.model = Model(self.input, self.output) + + def sentence2idx(self, text, second_text=""): + if second_text: + second_text = "[SEP]" + str(second_text).upper() + # text = extract_chinese(str(text).upper()+second_text) + text =str(text).upper() + second_text + if self.level_type == 'char': + text = list(text) + elif self.level_type == 'word': + text = list(jieba.cut(text, cut_all=False, HMM=False)) + elif self.level_type == 'ngram': + text = get_ngram(text, ns=self.ngram_ns) + else: + raise RuntimeError("your input level_type is wrong, it must be 'word' or 'char'") + # text = [text_one for text_one in text] + len_leave = self.len_max - len(text) + if len_leave >= 0: + text_index = [self.token2idx[text_char] if text_char in self.token2idx else self.token2idx['[UNK]'] for + text_char in text] + [self.token2idx['[PAD]'] for i in range(len_leave)] + else: + text_index = [self.token2idx[text_char] if text_char in self.token2idx else self.token2idx['[UNK]'] for + text_char in text[0:self.len_max]] + return text_index + + +class WordEmbedding(BaseEmbedding): + def __init__(self, hyper_parameters): + # self.path = hyper_parameters.get('corpus_path', path_embedding_vector_word2vec) + super().__init__(hyper_parameters) + + def build(self, **kwargs): + self.embedding_type = 'word2vec' + print("load word2vec start!") + self.key_vector = KeyedVectors.load_word2vec_format(self.corpus_path, **kwargs) + print("load word2vec end!") + self.embed_size = self.key_vector.vector_size + + self.token2idx = self.ot_dict.copy() + embedding_matrix = [] + # 首先加self.token2idx中的四个[PAD]、[UNK]、[BOS]、[EOS] + embedding_matrix.append(np.zeros(self.embed_size)) + embedding_matrix.append(np.random.uniform(-0.5, 0.5, self.embed_size)) + embedding_matrix.append(np.random.uniform(-0.5, 0.5, self.embed_size)) + embedding_matrix.append(np.random.uniform(-0.5, 0.5, self.embed_size)) + + for word in self.key_vector.index2entity: + self.token2idx[word] = len(self.token2idx) + embedding_matrix.append(self.key_vector[word]) + + # self.token2idx = self.token2idx + self.idx2token = {} + for key, value in self.token2idx.items(): + self.idx2token[value] = key + + self.vocab_size = len(self.token2idx) + embedding_matrix = np.array(embedding_matrix) + self.input = Input(shape=(self.len_max,), dtype='int32') + + self.output = Embedding(self.vocab_size, + self.embed_size, + input_length=self.len_max, + weights=[embedding_matrix], + trainable=self.trainable)(self.input) + self.model = Model(self.input, self.output) diff --git a/base/graph.py b/base/graph.py new file mode 100644 index 0000000..4981135 --- /dev/null +++ b/base/graph.py @@ -0,0 +1,286 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/3 10:51 +# @author :Mo +# @function :graph of base + + +from conf.path_config import path_model, path_fineture, path_model_dir, path_hyper_parameters, path_out +from data_preprocess.generator_preprocess import PreprocessGenerator, PreprocessSimGenerator +from data_preprocess.text_preprocess import save_json +from keras_layers.keras_lookahead import Lookahead +from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard +from keras_layers.keras_radam import RAdam +from keras.optimizers import Adam +from keras import backend as K +from keras.models import Model +from keras.utils import plot_model + +import numpy as np +import os + + +class graph: + def __init__(self, hyper_parameters): + """ + 模型初始化 + :param hyper_parameters:json, json['model'] and json['embedding'] + """ + self.len_max = hyper_parameters.get('len_max', 50) # 文本最大长度 + self.embed_size = hyper_parameters.get('embed_size', 300) # 嵌入层尺寸 + self.trainable = hyper_parameters.get('trainable', False) # 是否微调, 例如静态词向量、动态词向量、微调bert层等, random也可以 + self.embedding_type = hyper_parameters.get('embedding_type', 'word2vec') # 词嵌入方式,可以选择'xlnet'、'bert'、'gpt-2'、'word2vec'或者'None' + self.gpu_memory_fraction = hyper_parameters.get('gpu_memory_fraction', None) # gpu使用率, 默认不配置 + self.hyper_parameters = hyper_parameters + hyper_parameters_model = hyper_parameters['model'] + self.label = hyper_parameters_model.get('label', 2) # 类型 + self.batch_size = hyper_parameters_model.get('batch_size', 32) # 批向量 + self.filters = hyper_parameters_model.get('filters', [2, 3, 4]) # 卷积核大小 + self.filters_num = hyper_parameters_model.get('filters_num', 300) # 核数 + self.channel_size = hyper_parameters_model.get('channel_size', 1) # 通道数 + self.dropout = hyper_parameters_model.get('dropout', 0.5) # dropout层系数,舍弃 + self.decay_step = hyper_parameters_model.get('decay_step', 100) # 衰减步数 + self.decay_rate = hyper_parameters_model.get('decay_rate', 0.9) # 衰减系数 + self.epochs = hyper_parameters_model.get('epochs', 20) # 训练轮次 + self.vocab_size = hyper_parameters_model.get('vocab_size', 20000) # 字典词典大小 + self.lr = hyper_parameters_model.get('lr', 1e-3) # 学习率 + self.l2 = hyper_parameters_model.get('l2', 1e-6) # l2正则化系数 + self.activate_classify = hyper_parameters_model.get('activate_classify', 'softmax') # 分类激活函数,softmax或者signod + self.loss = hyper_parameters_model.get('loss', 'categorical_crossentropy') # 损失函数, mse, categorical_crossentropy, sparse_categorical_crossentropy, binary_crossentropy等 + self.metrics = hyper_parameters_model.get('metrics', 'accuracy') # acc, binary_accuracy, categorical_accuracy, sparse_categorical_accuracy, sparse_top_k_categorical_accuracy + self.is_training = hyper_parameters_model.get('is_training', False) # 是否训练, 保存时候为Flase,方便预测 + self.path_model_dir = hyper_parameters_model.get('path_model_dir', path_model_dir) # 模型目录地址 + self.model_path = hyper_parameters_model.get('model_path', path_model) # 模型地址 + self.path_hyper_parameters = hyper_parameters_model.get('path_hyper_parameters', path_hyper_parameters) # 超参数保存地址 + self.path_fineture = hyper_parameters_model.get('path_fineture', path_fineture) # embedding层保存地址, 例如静态词向量、动态词向量、微调bert层等 + self.patience = hyper_parameters_model.get('patience', 3) # 早停, 2-3就可以了 + self.optimizer_name = hyper_parameters_model.get('optimizer_name', 'Adam') # 早停, 2-3就可以了 + if self.gpu_memory_fraction: + # keras, tensorflow控制GPU使用率等 + import tensorflow as tf + config = tf.ConfigProto() + # config.gpu_options.per_process_gpu_memory_fraction = self.gpu_memory_fraction + config.gpu_options.allow_growth = True + sess = tf.Session(config=config) + K.set_session(sess) + self.create_model(hyper_parameters) + if self.is_training: # 是否是训练阶段, 与预测区分开 + self.create_compile() + + + def create_model(self, hyper_parameters): + """ + 构建神经网络 + :param hyper_parameters: json,超参数 + :return: + """ + # embeddings选择 + Embeddings = None + if self.embedding_type == 'random': + from base.embedding import RandomEmbedding as Embeddings + elif self.embedding_type == 'bert': + from base.embedding import BertEmbedding as Embeddings + elif self.embedding_type == 'xlnet': + from base.embedding import XlnetEmbedding as Embeddings + elif self.embedding_type == 'albert': + from base.embedding import AlbertEmbedding as Embeddings + elif self.embedding_type == 'word2vec': + from base.embedding import WordEmbedding as Embeddings + else: + raise RuntimeError("your input embedding_type is wrong, it must be 'xlnet'、'random'、 'bert'、 'albert' or 'word2vec") + # 构建网络层 + self.word_embedding = Embeddings(hyper_parameters=hyper_parameters) + if os.path.exists(self.path_fineture) and self.trainable: + self.word_embedding.model.load_weights(self.path_fineture) + print("load path_fineture ok!") + self.model = Model + + def callback(self): + """ + 评价函数、早停 + :return: + """ + cb_em = [ TensorBoard(log_dir=os.path.join(self.path_model_dir, "logs"), batch_size=self.batch_size, update_freq='batch'), + EarlyStopping(monitor='val_loss', mode='min', min_delta=1e-8, patience=self.patience), + ModelCheckpoint(monitor='val_loss', mode='min', filepath=self.model_path, verbose=1, + save_best_only=True, save_weights_only=True),] + return cb_em + + def create_compile(self): + """ + 构建优化器、损失函数和评价函数 + :return: + """ + + if self.optimizer_name.upper() == "ADAM": + self.model.compile(optimizer=Adam(lr=self.lr, beta_1=0.9, beta_2=0.999, decay=0.0), + loss= self.loss, + metrics=[self.metrics]) # Any optimize + elif self.optimizer_name.upper() == "RADAM": + self.model.compile(optimizer=RAdam(lr=self.lr, beta_1=0.9, beta_2=0.999, decay=0.0), + loss=self.loss, + metrics=[self.metrics]) # Any optimize + else: + self.model.compile(optimizer=RAdam(lr=self.lr, beta_1=0.9, beta_2=0.999, decay=0.0), + loss= self.loss, + metrics=[self.metrics]) # Any optimize + lookahead = Lookahead(k=5, alpha=0.5) # Initialize Lookahead + lookahead.inject(self.model) # add into model + + def fit(self, x_train, y_train, x_dev, y_dev): + """ + 训练 + :param x_train: + :param y_train: + :param x_dev: + :param y_dev: + :return: + """ + # 保存超参数 + self.hyper_parameters['model']['is_training'] = False # 预测时候这些设为False + self.hyper_parameters['model']['trainable'] = False + self.hyper_parameters['model']['dropout'] = 0.0 + + save_json(jsons=self.hyper_parameters, json_path=self.path_hyper_parameters) + # if self.is_training and os.path.exists(self.model_path): + # print("load_weights") + # self.model.load_weights(self.model_path) + # 训练模型 + ret = self.model.fit(x_train, y_train, batch_size=self.batch_size, + epochs=self.epochs, validation_data=(x_dev, y_dev), + shuffle=True, + callbacks=self.callback()) + # 保存embedding, 动态的 + if self.trainable: + self.word_embedding.model.save(self.path_fineture) + # 保存模型架构图 + model_path = path_out + self.hyper_parameters['train_name'] + '_' + self.hyper_parameters['train_mode'] + '/model.png' + plot_model(self.model, to_file= model_path, show_shapes=True) + return ret + + def fit_generator(self, embed, rate=1): + """ + + :param data_fit_generator: yield, 训练数据 + :param data_dev_generator: yield, 验证数据 + :param steps_per_epoch: int, 训练一轮步数 + :param validation_steps: int, 验证一轮步数 + :return: + """ + # 保存超参数 + self.hyper_parameters['model']['is_training'] = False # 预测时候这些设为False + self.hyper_parameters['model']['trainable'] = False + self.hyper_parameters['model']['dropout'] = 0.0 + + save_json(jsons=self.hyper_parameters, json_path=self.path_hyper_parameters) + + pg = PreprocessGenerator(self.path_model_dir) + _, len_train = pg.preprocess_get_label_set(self.hyper_parameters['data']['train_data']) + data_fit_generator = pg.preprocess_label_ques_to_idx(embedding_type=self.hyper_parameters['embedding_type'], + batch_size=self.batch_size, + path=self.hyper_parameters['data']['train_data'], + epcoh=self.epochs, + embed=embed, + rate=rate) + _, len_val = pg.preprocess_get_label_set(self.hyper_parameters['data']['val_data']) + data_dev_generator = pg.preprocess_label_ques_to_idx(embedding_type=self.hyper_parameters['embedding_type'], + batch_size=self.batch_size, + path=self.hyper_parameters['data']['val_data'], + epcoh=self.epochs, + embed=embed, + rate=rate) + steps_per_epoch = len_train // self.batch_size + 1 + validation_steps = len_val // self.batch_size + 1 + # 训练模型 + self.model.fit_generator(generator=data_fit_generator, + validation_data=data_dev_generator, + callbacks=self.callback(), + epochs=self.epochs, + steps_per_epoch=steps_per_epoch, + validation_steps=validation_steps) + # 保存embedding, 动态的 + if self.trainable: + self.word_embedding.model.save(self.path_fineture) + + + def fit_generator_sim(self, embed, rate=1): + """ + + :param data_fit_generator: yield, 训练数据 + :param data_dev_generator: yield, 验证数据 + :param steps_per_epoch: int, 训练一轮步数 + :param validation_steps: int, 验证一轮步数 + :return: + """ + # 保存超参数 + self.hyper_parameters['model']['is_training'] = False # 预测时候这些设为False + self.hyper_parameters['model']['trainable'] = False + self.hyper_parameters['model']['dropout'] = 0.0 + + save_json(jsons=self.hyper_parameters, json_path=self.path_hyper_parameters) + + pg = PreprocessSimGenerator(self.hyper_parameters['model']['path_model_dir']) + _, len_train = pg.preprocess_get_label_set(self.hyper_parameters['data']['train_data']) + data_fit_generator = pg.preprocess_label_ques_to_idx(embedding_type=self.hyper_parameters['embedding_type'], + batch_size=self.batch_size, + path=self.hyper_parameters['data']['train_data'], + embed=embed, + epcoh=self.epochs, + rate=rate) + _, len_val = pg.preprocess_get_label_set(self.hyper_parameters['data']['val_data']) + data_dev_generator = pg.preprocess_label_ques_to_idx(embedding_type=self.hyper_parameters['embedding_type'], + batch_size=self.batch_size, + path=self.hyper_parameters['data']['val_data'], + embed=embed, + epcoh=self.epochs, + rate=rate) + steps_per_epoch = len_train // self.batch_size + 1 + validation_steps = len_val // self.batch_size + 1 + # self.model.load_weights(self.model_path) + # 训练模型 + self.model.fit_generator(generator=data_fit_generator, + validation_data=data_dev_generator, + callbacks=self.callback(), + epochs=self.epochs, + steps_per_epoch=32, + validation_steps=6) + # 保存embedding, 动态的 + if self.trainable: + self.word_embedding.model.save(self.path_fineture) + # 1600000/6=266666 + # 300000/6=50000 + + # 36000/6000 + def load_model(self): + """ + 模型下载 + :return: + """ + print("load_model start!") + self.model.load_weights(self.model_path) + print("load_model end!") + + def predict(self, sen): + """ + 预测 + :param sen: + :return: + """ + if self.embedding_type in ['bert', 'xlnet', 'albert']: + if type(sen) == np.ndarray: + sen = sen.tolist() + elif type(sen) == list: + sen = sen + else: + raise RuntimeError("your input sen is wrong, it must be type of list or np.array") + return self.model.predict(sen) + else: + if type(sen)==np.ndarray: + sen = sen + elif type(sen)==list: + sen = np.array([sen]) + else: + raise RuntimeError("your input sen is wrong, it must be type of list or np.array") + return self.model.predict(sen) + + diff --git a/config/__init__.py b/conf/__init__.py similarity index 100% rename from config/__init__.py rename to conf/__init__.py diff --git a/config/logger_config.py b/conf/logger_config.py similarity index 95% rename from config/logger_config.py rename to conf/logger_config.py index 02d0d4a..fba0e01 100644 --- a/config/logger_config.py +++ b/conf/logger_config.py @@ -5,7 +5,7 @@ # @function :logger -from keras_textclassification.conf.path_config import path_root +from conf.path_config import path_root from logging.handlers import RotatingFileHandler import logging import time diff --git a/conf/path_config.py b/conf/path_config.py new file mode 100644 index 0000000..fde5fee --- /dev/null +++ b/conf/path_config.py @@ -0,0 +1,52 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/5 21:04 +# @author :Mo +# @function :file of path + +import os + +# 项目的根目录 +path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) +path_root = path_root.replace('\\', '/') + +# train out +path_out = path_root + "/out/" + +# path of embedding +path_embedding = path_out + 'data/embeddings' +path_embedding_user_dict = path_embedding + '/user_dict.txt' +path_embedding_random_char = path_embedding + '/term_char.txt' +path_embedding_random_word = path_embedding + '/term_word.txt' +path_embedding_vector_word2vec_char = path_embedding + '/multi_label_char.vec' +path_embedding_vector_word2vec_word = path_embedding + '/multi_label_word.vec' +path_embedding_vector_word2vec_char_bin = path_embedding + '/multi_label_char.bin' +path_embedding_vector_word2vec_word_bin = path_embedding + '/multi_label_word.bin' + +path_dataset = path_root +'/dataset' +path_category = path_dataset + '/category2labels.json' +path_l2i_i2l = path_dataset + '/l2i_i2l.json' + +# classfiy multi labels 2021 +path_multi_label = path_out + 'data/multi_label' +path_multi_label_train = path_multi_label + '/train.csv' +path_multi_label_valid = path_multi_label + '/valid.csv' +path_multi_label_labels = path_multi_label + '/labels.csv' +path_multi_label_tests = path_multi_label + '/tests.csv' +path_multi_label_error = path_multi_label + '/error.csv' + +# 路径抽象层 +path_label = path_multi_label_labels +path_train = path_multi_label_train +path_valid = path_multi_label_valid +path_tests = path_multi_label_tests +path_edata = path_multi_label_error + +# 模型目录 +path_model_dir = path_out + "data/model" +# 语料地址 +path_model = path_model_dir + '/model_fast_text.h5' +# 超参数保存地址 +path_hyper_parameters = path_model_dir + '/hyper_parameters.json' +# embedding微调保存地址 +path_fineture = path_model_dir + "/embedding_trainable.h5" diff --git a/config/path_config.py b/config/path_config.py deleted file mode 100644 index 0d078f1..0000000 --- a/config/path_config.py +++ /dev/null @@ -1,54 +0,0 @@ -# -*- coding: UTF-8 -*- -# !/usr/bin/python -# @time :2019/6/5 21:04 -# @author :Mo -# @function :file of path - -import os - -# 项目的根目录 -path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) -path_root = path_root.replace('\\', '/') - -# path of embedding -path_embedding_random_char = path_root + '/data/embeddings/term_char.txt' -path_embedding_random_word = path_root + '/data/embeddings/term_word.txt' -path_embedding_bert = path_root + '/data/embeddings/chinese_L-12_H-768_A-12/' -path_embedding_xlnet = path_root + '/data/embeddings/chinese_xlnet_mid_L-24_H-768_A-12/' -path_embedding_albert = path_root + '/data/embeddings/albert_base_zh' -path_embedding_vector_word2vec_char = path_root + '/data/embeddings/w2v_model_wiki_char.vec' -path_embedding_vector_word2vec_word = path_root + '/data/embeddings/w2v_model_merge_short.vec' - -# classify data of baidu qa 2019 -path_baidu_qa_2019_train = path_root + '/data/baidu_qa_2019/baike_qa_train.csv' -path_baidu_qa_2019_valid = path_root + '/data/baidu_qa_2019/baike_qa_valid.csv' - -# 今日头条新闻多标签分类 -path_byte_multi_news_train = path_root + '/data/byte_multi_news/train.csv' -path_byte_multi_news_valid = path_root + '/data/byte_multi_news/valid.csv' -path_byte_multi_news_label = path_root + '/data/byte_multi_news/labels.csv' - -# classify data of baidu qa 2019 -path_sim_webank_train = path_root + '/data/sim_webank/train.csv' -path_sim_webank_valid = path_root + '/data/sim_webank/valid.csv' -path_sim_webank_test = path_root + '/data/sim_webank/test.csv' - -# classfiy multi labels 2021 -path_multi_label_train = path_root + '/data/multi_label/train.csv' -path_multi_label_valid = path_root + '/data/multi_label/valid.csv' -path_multi_label_labels = path_root + '/data/multi_label/labels.csv' - -# 路径抽象层 -path_train = path_multi_label_train -path_valid = path_multi_label_valid -path_label = path_multi_label_labels - -# fast_text config -# 模型目录 -path_model_dir = path_root + "/data/model/fast_text/" -# 语料地址 -path_model = path_root + '/data/model/fast_text/model_fast_text.h5' -# 超参数保存地址 -path_hyper_parameters = path_root + '/data/model/fast_text/hyper_parameters.json' -# embedding微调保存地址 -path_fineture = path_root + "/data/model/fast_text/embedding_trainable.h5" diff --git a/data/01-anhui.xlsx b/data/01-anhui.xlsx deleted file mode 100644 index 29b744be848855311fc875794e6e18e77342f6f7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 50784 zcmZsCbwHKh(k|V?rZ%QI_wt9eX3d)QtXVV9%z9N*L`B0vy8j7NSrNPc{eOP2fj^kro2fb3J2-Qx0b(S; zKVUtGwLG7zf+8a!@uDChf&N>}#KD2n-PYzqJVF5#Mk0L{cnA7y&%;5l!Oa>77sAwY zrRK5!(Wyj~usMFo!RZ1y$Fw`P*Sf~zAs#Pla(Zv{cfI`xE5|+~qi!jvfw?5nQ3e}q zp=hfk(59Om6T^bdqRjg49bJh&mJ8?Unfr1DYV2c_a8l+qm*}lGQRwxdGNF!j$}Vhw zD8n6(+24FrVCS#-U5p%LlVK5t_YA8z*lDdD*ZOT%{rTeLWO)pi0cRs83a;;tK}B+p zdr=b`+1kfEo_a=SrG9HV+_R>Ihq&kRM1*7Pv89y|X~s47Ct{_LF` z|MLU=c;)8eVE2TRbNT7zc13FUmf9aw*fy&A4n|904`KA{uA;ujT^`zUv3^LZp`F5# zUCR8Cd`QI~B-?LSnzmm)zq+tHK>y$9mM3G;CS$pzg=Stze>9Yq5MfB`&7=iW<@|Y5;+tZ^*rf2Mz+!~e@7ylCHWs3 z3u{)Z29mG_Z`6pn0iinbm-Xty7aw1(h90OkU}W_ikm@X@8OJQ+gP$=FQpW|x4!n6+ zPD95ZibG;$2@A)%JrIO`y`39pf1<+ltNnn7dL{caI&IHHTdfmsgDqvfHEs9ilXKgP z^PjPg6@KkTpt5lzSf6(q*hZY~&ZlhSOwgff`QQ2PXO%k0qlwY7xu2sF7~$f=oiI{D|SOk`u74C zA#2&p%9$0^wqe%KK?L|&EvdY}aFjJNTm*ae?E11+5Ro*rcl~=e3tn@F+9yjavhx%l zx}kI+IS!?%Z6Exc#&=0##;+?fzdYC5*k|N0AZvU+_l7r!>h%+jeJopNcLzINW1C*_ z@*fx?-$blOO5bh{ImRl_yGiKCC@H)RV}c#r_$#Vq^J!erm}z za);O#rNZfTVs~RivUV?)q)T!N3nN6^<;FHnpaQtM+dPDn|GxnKKLYRVo!&WH zSy;II-{B`4d+`gI7ZOqi2`Uo!|B-!|0=qw{=(cx>jE(V6FK;q_-HB!Hg=KyHRH8Il zv+nZzrQXT!S)VK)?^O+k@PNTSeGU$|7JV+e&-^Z?supfK?#71hn%b{cHpdok*9jJH z_qVxd{Z9UNh~8fR^}D^iyB77mXjB!wxf=7kJwG@if3wUbdiVQRj;P=5AFlSx-k_)HK~nNc1xSFi}4WgZ?TKF$9s1N z-|tL9@7|8y%%6#R-(7UHwA4z{itf4Ho;G&)Tr_svUa!CRqiDlIqvka7xxQW&^}W5` z?k}+y^}G7}y<>-Azr*+P_xFzL?z28q%1bWa%kzGtwyTzIuG@?CZ3hoOUrQsm*}L1@ z)kR5jEn|y!oBaf| ze&@?C_p(#wO%ca;CyFNoZt+zOAA9aZtIj72HvMjnduc^(59aUAmyg0!MX#sM{LZg8 zjoMn;GHvg^mvr3R^zYprZ;pNYVD{(ieU5K+N26cI`N0+0)L#2JFzs@tW#;nB&F)yv z-4$l!##E=!A%EQIV!#Wdju+XdcOUdPk40Z}T&=XIY;TPs<55%^P2LLOQ9rF}c(-t+ z$@BAW`c~-HrP-(1;Pft?l7Qk1p5*D`945C*Kf2=80>RbZ`O#47Cg1g#+?S)xXD@EX zb~{$bRE_#N4EKmmi^8i>6UsD?A+~O zoI2`qJ|jBwG&X7p5Z)LFs?d zBlCWpgm`ipEs909A=%cwcrtzEIHEa3VvTWYGvA_ z`L!!ga(*yBC~o?q#Dv+Yx=>L#Qjv*=3+409uK?`anVD|~ElO0Qe@>dnzuc0iyqPCS z{&Ic-eGIu;_n}aWTkD(Z)K#;8sduPAx5M}_?Vb4E=yTN7vzm(RyUoozTiac?AEGzJ z-j|l=$6Wi{Myn;M9YLzm=_4CMnCH_!Z9A}faITGA=Oqcho=`_)OIj(RF~(m^dzv4h z5N1HfP|eo+pV?s_($fvKk3ukY5W+pkFFkvZCE=*Z@*n)(o|U8r`S?)tkj~4$F$_(Y zoBGW4ejp%#X2oAoWW^uS6X0JsDq4TFGc0ach0z!1N(EQk)uPALnFREt`ab9heQM3; zDag;f(Urk@@&fu^U`cWJr3B8727$UUKV_Et#B@~^0UiVg+(p9Qcl#^UUD++Ww@8Dx z4NFeDlGaAs1Ar1W95uAC-OuW!q7pY|3^Kk|x~UML{F_gD7k@R=0R+E^h7lhuK?MyK zaKLkjqw*H1{($B|7+0@LmV${p8>+~VCIE^X4)FhU^RO>3Af-nshcgei^+wF=q3PG57Ciqmu0{qNEV=Edx$VR`L#y$-M%;*z1cDhnH zcE(Mg5#UP3Tp0)#qxK_V4k9cqEU#QYX96Y*XT+iC@6qP_zvImJhe#nyrlIsBp?lH9 zYJuFi(eYRBD?CcDVp;eFF7PZ3^{H5U{>dfEhh{ ziA&k0^z0xBbgk|^CciH2Cql}<(ORImn|z;h5iBf6D6|vNp6>x>5ei2|X6-VrSNI(d zwQ6bvumZFccf!QP^@MVy?I$NM=;>zmT0bUh05~wcj2NrRER!Mz;L!YiJq1OjS_@G( zU`q!;V8i>A;0Z^mY=WbNg3$2`V))OY zRaaS=Ju?;}!N6k`fO66TUi#lKc>xq|79UtnIL|@7TH!YYr)h;A2kfMhMzE`1#cP&D ztg4C%9NJWQryJn^b0c)vZWUp2P=nt-PWyk7AxeRrg#vfhv#_<4oz1o7+9sVq;JT;d z=_m>tGY?gcJz8QzzyEFmNg2AB*VXI7yH3mEeZ2blQ>tu6h_ z%-#S}ErMQTa>ECw3CQCIE=u*;f?_bkJOm$1l#S~D>cEP0Mv~ECU78UZEDr+HBVb|@ zh7@{@ns!9kc&zf(q#F>Trek^{a2F#MUi=cUMGo4r-HG!?IZ&KW2tuwp<2N91`dpDkyMl}HDb3N+hUtibwG{7#s54j+*D)2;=<*3ksO{BBrg zW<*QT#R%O{VC2V;%SBJv1!3crp&;It1J3*7-g%un!S~K9?zDd~;6}72Eq2O;v9h!# zz4XfoJ133O&*Z6nO~xDuWGZP~=|$US{uq4pzGa%Pil|HFjigwmb#^`G8eYBd@aeVc z{&vptR7YotUv!S)Exw4)qa2aWk|%e;LCf{OetqG16nZD^rQ}iomytAf=SPG0oB8jE z&d*eOd=|l0r0t%j1#~|{;WFsYn`AAr)&b1xCE~MWxBT7S@FB4dYX@d4AG}=-sbr)9F;RXPwV{) zV|+j$W<(Vl<66DMtO-$5f1|K&JlaP-TKa%@8uS4W!0Y6^_ysWJP-4(!y*Gw$7xeCnXC@Dv*S3Y5k>s`{iK z3D-K>iw1mjr1EsX{;O_vq;OB*@ z1T~!&ti6EWqX$69s8F94_MTkyxTT@@Q7!C!=$o5}s0*`NT5)Zh#XR^!ESZK80C`MB zY|Y=QTH9aS1U~8{|LC-Gy<}enIKB!N)&w0i>?}PDS%T)!l=Ueg0gk45?;K96Y**g$ z!b$*S7}X4a@2v7-4O$uUqR`{`l?&Ws>0|Q2zVU_o#t&#oh(FarNyy(Iv?A9fYyUX- z3uPQdCA724ecv2w<3N5weW$(@rUOa8>_E?G_;q;w8IeS9wL4 zM$kuY9PCgkeAXM$EK-)DyD20HA!|T(2orN)mD{{mFri7vfTWaziHsCf3X~@%6&w_Y zmBBbmM2tw2ESmjLvE>QJ<{X7%hhj04z|$N+-0{T;o8OL-Du&U_@WXt*AkwObgnNRQ zCtbrIWUO)!eO=3RSMNWv5!A(>45y@F_feHse6nP3r!ftT_!$Eb0+xoZTxM+Fd!8~Nsd-Dp*)?OXM(?KtB zGo4M^TXoS-)tnrPQZ|R)u*EZ_f#}|JiFcyeR*o186 zii5X0;KhkI1_p%#2P*|w35b;bBLX76-(m&l7?{@4NF_~#!S|1oq;C=a(7QeXi~Zh=p?fpJT78bYwF5M6MIQkNmay{*&~pBkiBQK!SPJc>wG^5p z8X-#3`&S}!^R0R|$b=>Y>vE3(K%m9f0#|R>ze_4Pq{}El*BlgFSHs-evcyWXo{KyJ zaf>IS0b2%Z-nx6-+r!+X!oUo6;xEEpl5C-Y#WVYtAWJYiu|l50h*#dusJgvu`6;Oc z=JXJI(8bdU=rZ4QP~hpDLZWct;yNP680#Y90esIa2D)*|M;{5>+!Upg^S6(3=Lf-_OmZ(*pBsV z%yx#)f0XMVnQ%>J+SAeQx2Qg2!_&-Hx8`=wU}2Aa=0~uZnqx~F{hUJl$_E&F%-!1s$$+CkSF9@r*UUt`o=5p=cy-?_?Fl|TF{!ORMOnx zn3Zdj79D@Xw3md|wjd}0d7HJE81R9Xtn3hoINJM5hdE0I#xAzHF!+)}4BQwCxZm9t zIg6g+CoaywrDo39M2>KV8%w;l%ZZM|W}g5yrYHbx%)7;xI2Tu$8ef9VYC|nWBgb%U zosBJOCaiQxkMs5EA4*9yQWU8U13X1JbY+q*B2^L;lG7ezE#3H25UIg1At~N6dSuxBc2V=0I9v!qUc++y?fso9&Iu13&&?!`{!x`CQ84r7t4HPBtVdtLX> zEgQD;!fe)~KljK>V25dXSy*3Vlzm!cqA=c`^t+45HH%3s#XNc1_>QpItXE(4S&aDJ zk!{HD_`~d~_f17#uIuN%c1Tk#%%6;y4P}1uuYS2YmSfAj5aTzy$yPiNjRF@SqGJZ! z+;4u`DlC4b3O|LeDL!sr>F~}KDIoYPqPV+*+ST!)auZ& zZ8ke7B>nZq=F zx`G9vYG7pLxa z{VBW3avOQlS(_!E7Z-G=Eol78XPbJ+eUB})0PYL-&!!g3--vU?f^^M7X=)0>~qK6M`;?k7cx;DJa-_K@eJ_Y!CzgY1y&j6-J44~G|W2{Kx62jZDeRA@86F*Ly=s- z#q$ys+Y;9P(v`Gtm|xzyXL+Esw`$wMUl{g_+*P}MkkBpoDt~r}X}MN7RM9!!$tch5 z>KgOL!LMv|`y`Hn8q%!xZuQt5}tB&-UJ zjK7jB4%oeUo#4|P*sdqgxyfdixS&+Lwn!hY+X2F2TS-yw?Ew20Qlr zc<9GO4o>NRU*>x`p^X0jb@wz#E(;Y zu~E3YJh~F`o)rxkPk$~n%Kqfv1f#goqLB{euM8UifqnD0=+tm+sXwq3202O261m7} zMbv(GlP)j0#1>A#=bT8x?_&`}Qy5=nDP-WNceARm?qy0A<1jIDjIFcJ;3zO#i}vf3 zFUWT}WZSwMC1niSD3WF5+i-TYh_yI&N z{5TR!qMt2Yu=E4*Lso_f*EfH#@yTPRIw)b9kx-)~Q`dJdP=~k_mJe@%8qlb5lG(f> z@zs4RD>M>a9I*RcpfVE>wLpAX2(|SGUcq1v@h}NT7x-aq4jJS25?jo+F`j6?Rc3Cq z%3#iGxIiPesx3B%QqylC96S}(i9a2B`+q`Z9J^V1qYL~qTV3axO!OltLln}@ zejdMdNhVv_OV;1HZO+ zVnRVk9`n2R1Hd@RRD6O|z>*eI`M|jk=BrUr=Ns;B;{Qum!Im0J@dIY1(fB5c<9TYV z2Txx~wf#_~OK@=*dB7i0x}Iid%^^sI?y{DO45qeGWQqhfl5Z&7FsU+$;G>;FfET{` zY^8R3CZD;gl1z%@R(xj4mpqQFN3ZR54ts4hHPJ5i1aPsZ#~Y~n?*=9%szlOQy8oWM zB{P32IGYjyct*2iQt8c8Mha?!4H$J4p+0kuqoSg(d>~WgyoBH+o|y}rJ>uS0Nv)zA zDtdX$mu?UHi_!o*YB<&nV z-B>$Cm{hWGOGcS5Rrp7DUh698Pv>8 zalV{5sGnOo-cK0W^0wLR<^NqGOx?y2pp;079=9UjH5qIz^(YPf#%++PW4kL&;~Sbb z3K{J?+D2##U%{uw8xey5Yn!6%D7H%Ebk4p)qCY`1wZa}diyu(E9R}b|crsdFsCl!4 zi=6f9jCk1|jXZzEw_USO6k;R#Nbaz!m+q(wydiBo`NFt|A1i!08LrF`2o6!`0h8xP z>~0dsbAB?j*REOFr{4(NU6bvK!o`G;x8m2Z_J&Bn$n%4DpPZ@w`)o~QIf#5oerP` zar97V(ZCWer)d{RG(TJ4;>qArLhPV%6rH4h8XhKuLd)cMbmgXzK8UL+IMr%+PE#xU zW}HqQgWN$0!|WU;3Ff-}3v9x@ zd6I#6&lm@Q6%l{fGBx5MYOg5j1th1lcgG7jl)Jbj+Nc>4P=h|cK!hMMadAMYf(;6V z$TLOfl2#;gyNGknz0^dZR(%pKIg!uYa$qZibdHP@%W=NZP?Vo!M4geWM(TugYRkv2 zjJdQZyx4G6hRCS^IC20Xo}5+&O3n^bi10-mvL0VB2f|H(>sHBNzKCi7mUBcQVU zUZuOB5|x^{Cr`o~mnM!vhLIYX`wSZ<7TzqWm34!cCoik3*fS?FjFIZL5>9JkjjbWe zg%tg$F~o*(H0n`%zNuv1>5|+#dGrr%L-)sIZxY$v>2V#kuCT( z;^Z+VrrCLCXb^v?RdW$h*Q|s7fJ{bM^ux-zUFPwV1v3_jNOA<6vuVhEEEptg%E5j9 z1@x%w9i8uTZBS+lWIoY&OKa=Hjesv4#3Jw=9Y7NhD0B1Dm;GfrwUzkJ03ggoU)R?4 zx|FhSq`qr^rk_M}!w3N)dE~z$d2uh`@Ml)*!k<@`7~*MhNDs}fO-)#e3rMX2%ohO6 z;bHAP@zN`KT#lC$mm)<;_bg=sQOegUWitt38}lt6FJw^4Ua5n;kxUA>9!{;UYgeVP zKp;XkFGWJ)Y9N&?PSUv&4ohouu~Zsp4$XaNP?;CU&+z#oj(%N=RM^P)&vy;dSHNWa zoAT+za^IrpF%@Gi;Y4E_N74JM|GBDzK1Pl~K{+RtT-j$#Ch+cM8ctzF(Sra&C=Wtn zc^c)E$T1Qq?#-YEDKsdX3RXB$OCe)!eTTYA#A+xBvW@i!*8~iO%Hqy01d-u4@tMR} zpACiNt?Cykv5yT}Xbz-(gq`36Pz-;h+=~`IH94dkJyzSIvcq*cbtdGB*$lKE9E~ih z9z2JlVz652@larhe?eS8ovtocH3umQPCXBlTv}cH^vIipdNr3e^K}CjlOUSRI7IE8 zZ|3NfNp4)sTGx?OuOYVJ8tD|u(GMH-!D)iaco&CX56Li3ZBQLx4*g*o*BYAPAz_Zw$n7n9_ z!`FU3`SOPervrcep;9REcUz@LW|Kr0%XBep`Qb8I9(5b<@n5kV%zV3!$R&?f7;3Rd zInS%5C1NaF!igy}#?wTAi@RWP+OM8N*=7^+CGsY(ZMERE0#}Zb2{@JOX*hjQ(kK%) zsvxbxqM1@-#R!~`l~Pc?r>MM~211+qetIBGDa0%;g0~S&Y9p_nrxuQr`R+>fgL5Tm zhu5iD2qV|Gp++2SS7!CwSF*-w<>MWyV2bq?lI)JWqmM|a0k1$Z%V8& z%jCpuwW^t7aH16&3@IbZtcubKyWZ9DV+e?oIr0)|BHXsZ&#keV2hJLZNm~k=mT)U^ zoe`q%Id7~WgXk|kAJRyGyrG@}KUTAxiS2)qgu{XLykmLO5`9cFkoh3c;gF!XNCPJi zBS#cXgMD;u1Ba95 z%ee|9D=0Zi_x&;hy&-2Oww&jI5xATJslK|gYh8k;GK1TGQ6J1WXYqF*r-%CEddg2o z%{48)*0>1#19Kf(34EvG6>iOEAkjf<>1{PA-zRr4rBY`u+Q zkypQ=_|j+a9H|4DL--~kArU6`7r-G`g0(aJL2-kyK9`?9>E z6>9+~fLqDIhla*hToc@4TRwW|IZu8^%V)ikPRH#`tA`kj#|1$?6$^q+oxAa+W?LmTDRqRpj=z8nsN(aQu;WtVUFG7aM0HS z4GlMmqU#fv!_y{TdCDLi)nsfaZAf0*i#_$07hnVS|F8ipPAW8mHOm{DLr`(gTAo@K zH2?RGB__a4+|3@i33F_Mav{D1X9DYLpmJH08q0=+)(0TI4u-hyRFrE{OD6el8K40S zhFp@fbdm_*3qKF|I?e~3q_jTt63V$b^rC7(nWNV+Eo9XI=yBL8rBX>qf0psRFiCqe zq!T_O2k#|)X_ZA11=QFKt?xmeV>9TP8Vl^x*(6&&qA)pkE)^pBJ@ilj>r-FP@sEn#ecw@vZQdy`Hkk774MW&1@`-(@VD`;V)K|bxMd6s@PfK4Z$e9&3Xhh< zBdNKsetYjR9l0l|_w?n9S~oQ!|7j&?W{f{QrbaVUwV%u=ZEW4dT{vhbRR?+zmH*WF zgb%%r8KW$#Li!vo_x>xLXF#7J%M@rMJTaKdV|5F0=}25$wdmesVf6pzZ4o(S8X~wS zs#-YRsHlLVC*+p-PilDQ8#;~UkaW|s>FN$|-~y%Gbfo|ZMB*kD%ZhX|%HKVb1^9#jFg#A5DGE^49(rBmCiOK9cXfc#=Q4GLwXHc$j=19cau!o)IAwHvP0!JXr$ zljthb&d&X&Y<4@C;fc{Z!I>boJQ<=eR6;%!hEhTn@ewfyAFn=}J{2pDws4z$|127c zX(jx~KMk340HE>7^k;)YhacY$AStKjzoh49eA^)p78QRS;Z!8pZbgWR4x={6WfcV1 zZ+vH`pw4$FOa&_^al~Gcu%k#Az+V0anCTSZM>#k?=N*uO_b=d){7wUG64waCmV-W+ z+J%_O2H;1&6{)=Jl++Rjl`&j;J>ud@KndJTcAJa0&NMXu3R!1jnm&?hBc=TLQCDw7 zi%?WVCyac~L(u*^1xli&Jwwo=*Hc+m=KNUn45DGS=GCmE6gbJI6ZDeTm%7xi>)?`s z{M=31B<4`LILKQS&)E3ii2zM41;(#+sSV6A8(x4Yq6p2Q0}J_LiJ<`5@o^i@dSzA3 z(HkmuunO|EBh)Fl*`d|;4VMhy=WY5;Yy-6_dQUJrC;h?OoWb#p4VJ#Cw0db03lL?z zPS$CO_$uIu(+;lv(@+_;?p(=**|*N{g1C&E)3-m%;6V{ux%;Hwy-iC-AS>HLzQwVo zl2oUXHduHaQ*Q`aL@b>1<6@4F`T)mFYPKYojc78x9>VjFqqC(hI4U z=LdZh`ll>2y{~{7lO_-iK*xb;3zTf_suusDbjI=~t>}NMQl5z39?u^n|_Iuhi zLY@Dt0-x=3s?7_r`%VSUKdXqA>bX>NNQX%r{DhQi@|Ut)e}$-^tt>5ZtJOf%Qh8q?DQuo*LAI>VNZwka;zm1uiye7Ju>08nqg{Bgp2Kc9D!Tk88;4FhI&dO#twHfe zLfvbpwq5*adZ`no8V1fU8_%6?d|OKOeD%Yhe5b6uNqgK^n0r?ZM-9_p(d}%aR-$Da z4lX)L`JJ3q^KEtSTu5 zb;O@)`T)s)^9P&?_ib*XRr$S&e`I zqdq37L)gZBePas)Kl0!66`=CN>8B&Tk`h(Gah#5ez-iCUcqV=F>H}w86sq6wegi!Z zDbPUp7xNjvWc4g8UHVf7m=mQFhRdO%RoA{GMzBv)F+T7o*F9{g|KIUFvA{v99SorqF~$bPq$uZJk&yX#ecY{l}X`jZU52&9(kpNg@5K&*4D; z`2|aB&Gt-0jtDryzt-jqFnBBz#dw$~9d2~z$6Slsu34MLrS@bWbiLGUh5l}0US0w^ zfEEq3BRpkT(oOuI!_tACL%Olj;Y6AP9=AAp*jBu0U==w7BL|(E{3|m!vFGg7X@6Sl z!y`=pY=>8K{UiZNzU)`8_61nSV&{q&(3Bv_rodFkA?ZJYk{Yc4&uwssMx>CCMn$|g z^Kq*#qZgUjrqzJgl`9{1tv?_Y0vnDrizBau%_gjbsKdlJKCyls_g#s}m!RsE)WX73 zhd`xzJLyQZ%&-&#aRTzWk7o?mM{3`Q+RADmNU8pPvi>-}*ctrbdp{I9R2=U)5LXn9 z{of*S5y76Pl`Byd0G#1hyv~{5CVj!cC1?NFB~M6Hb!&fPloA)Ah4WCHAXrFA^ACe1 zgIApBNB@|xHGO?>d!XXT{UUMvu++vX%(fEL`$ja^QbxM8x(6XVjP~92e*)pLQ0PBE zlIs|VToH;)-G3!F^$*E0*`p3x(0m0`q2%6c5LEIQ`yQ#Wq$@H)Hiqx!^GY6mx<$5(@fD1vm!X=M?7@2Q8mQDT z5(LJYl%32f9@Ugf*QX`~yM+)=De%eTeRoj8GfM*pn3z%p_o$^qRA@AM_R*!&A^4Pf zmwAFf^|A6_)rTAlF46sJ)38=N+n)^?6(GiZlHJW?B~ytB$mpJlVbgCjLo{ZO>$<2& z2$z;CoRih?JP;q+|&-|-DW%UCi^3-HX<9yIR5CM1=bbmc9cA!N84?8GTl zZU$UED*|UkJL;W0v|$s zD8BxUo#`+)3G1lbkuI1d<*1b+)u$FPf08FEcAqEu3S%hlL!LYJoBQflLHz9jkstR7 z0jrcAGQZ_w)2V~W0AUxnI0UA2uMY${F8IRlD>u>gT_`j)DB#HLiS`{_+uX%NPJ7iDsb9(wT}w6D6Se$;M?;n<*{*K5az-Q!$sDI*&0B=Yb zr``8$S-W%E9?H)bq2whOT)ZPXh8w$9J3yf+mHwXsRF37J@~6}ss4LmG>IHEIJ37UU zIh%AXIMJ9-hBsIMRUXorrHvab?Mnw)l~x80=xsW(kPC8q$7OWu9V*~t%PfHxE};5k zeq{g<(fh_K1^IQ*Up?xOHKW$v5Lc1pq2Z8DC^=AWxmQ>Mt=WKl@ zJIfgf&cU$%5jotQVvCW`01E5a3qBh%^0&$-o)a6Hn2OZ>jRo2b5>fZh#`6AK#)Ym_ z9_9sgMtgEx+VjRW`VmhYN6dU#(@9_8|4|%y>gAM3D%w1#ko(g-M9uEA6U+0t@ik`l z2&cqknFU6ChF9~{H)K;+K2ePw@e4QhgpvhJ&=Xpj#^A*Y|dFor4^Kvk}EtIulV zwPYeC6It}oLx1H+|Dg%#^1;lzV%;0@c~+qM zefQy0W}Q?}pdd4N_xUQy)#p_!Ys~8+M$p@7m>qb{d`{u(pmf&L=4tbX zgfF88e5nxl&Ck&xF+Y&Y9dGYOA$~k!VB6=ZUk^fVN91Mhj)+L#uL>kU^ee{KR<9OP;1adWn!)p~y%TD-_N+OQ@_ft~o5PM?3auIh zX{Kq=yPm15wzPBvP(M+kXqU)-tbg-D3Z)5S)dA@HisdD|Q~k(axSeE#VKoPcAfssC ze%AdkR43xVnb^gG(WJAg*;33Qhvk{GB=zd1mV+?@xjo9jEhaQ#*~wKyF%Ob%}9Kl{MpUe-Egiyynhe0Q(LdenY;_ z)Mo2I>Z5UcgkF#)YAR0ET6dZl#YkaL0K2c=u|SE-baGKlzXtW8)vVH?5=EWB8%1dy zcoY1}@pQIZ5Or~Fnj>KkmH9kLj45`eXeIUp0d->QU*-fJ&;-q3{f@EO(dd-tAxRgjwlXOuEo*WrZj_ zO!Pd`D%j=YA3`S7N6gkm6$4Wpv#H+}NRr0M?GtpoS!-K;pwm=~sSOMLRo@+qx)=R~w8oPq{ByM_6`w7$`;6b1pVDAeedNC|ee}f0SRs~}v9nmt+A+9CaMS{N zY=#Zpzik>EK(f1n?qd9$`qr>0B5ZH#wP@7PAcA`G&YNd`r8Z*qv)7b8_(b3C64UOI z`uQBzIbK8NovhvEjg6Bi%(g3@1T6PrESdpgPDgt5lYiJ`bEN6hX0z`Xe>~YSPpITk zLi|KY^qLVLM-=&akTQ$|Jk0#8Zf@~i)un);JFRUDvz*mI-xXi$0nOB#R%05QNydW} zzeV5n%&Gl%?^Y@s2aVl?hpkqn|2ialE?`wzsD4=YFl!~J#@;%}kgHG#IC$o`m67%w zuWzAYCvBlG{e5dHrA?n+?q)5F8< zHQd_o?(aBt!{rk%H5hx{m!ImECDONwkjdHrzYN2Br;SNEmxVuVW8Fc>$-y|5 z`8iMd$^;qle1?_WRST-+QvXm?DvnH!tY*{jAGQUyrqD(l6y#M>U*pdzLHiOSbt^Ce zo!`7zd!HrQTY01250)XYssbe0sm@pVY$g@(uA$n|XUPGwu)i>$u8#xIn*PX+&Zsca zn6$KotH{shB>TrAhUFp8uB0_ZBe&Z+cyaBY$tJFSQV81BRwbJhUd#DD5iCp$TQo8* zY^An;7J(PTT+O=hrtb7PntUeBavNnq&Z?~hV&nPkSwq)uV@6d)^dVm>D;B+DYcl2+ zLrZD*3jDtD)AGy@GZQ49SEl`orziX>oZosr*lt7Qms=_JVzE`CrZDrc!QWJsR90?lFU!8` zqfJ*{fk@y03%9tdo~-&1bw#w9z9cB~GeM?8@p*^)e7l09Kb$)8an#?}M{a3sZxVO( zBF_XKPvCHX-TX_%ErdPrS~ekk*OAi{m6?yDZrBf(_#3>#95R(*1bUS7QIYcSw$%oN zXJo`2iB0=(LtIxyIM-CZQM^J(a$8F&Gas4G_$xi57$2cSs?2N_00pn&+7`vhde3=Za%W6X4zY_G+GoUL;jT1i0`(f4ITxNm{B6>mIaBtn2zbV ziuSan|F)-jB}9QPj7-NgtsjXx)+I<0HrfxTh+-bUeNp$DLUng_!dk#oJIOwlX-{1w zL*i*0RpVyDlbw1#N6S?k0yOtDRAks;+q9sa>Rss2{$6eWZPcfg;Fm!M{1q#OqT+0y zqMj(48ObKr5q-?xI&`-q9_}#*n{m<%z6h1&h7|WRq#J)8jk+z&J&ffsd!fa06N!s- z-d_gGCdZ3E@+486rM_Z#S8kb2ykw+AJUn;no%*>e9Y%wmc&nXWBW%uRc5uO;5$MNN z%b{b1Q80`}ptv08WEv%#Y;I{rO;S9ZjOxeB%6?;#B5y!7d61;I=EsYkY83lT<~GFA zu(RX99Xl=RmA~X;8Zrcv{fji@H>PW`=0sx#R9Ce)llr4U7Y5yiLha{^L2{EBY5r`g z=VieRqRGao2QCqg=E~mbO+CR_OPHiPo&HYE>@-^ZF(pL)8|wN8d+PmW$$~eyPw{1m z`_wyBBK+T?Q_zdJMC*Vem1_*B%)W`WI{$FlG5BSv72blWc2S_5a^fQq?leaq=lG*XW4jaiSWU5FBlCO^F#1d~C{3)AAEVzXg6t#a=HUe{xSeW+@i0>l%_K@3ibO<#uh> zI|*6!WtlJMN_O%wU-x|0oKE;#qPfAc*X6j{VOMR}h&;E%4~G3JT_mP>yf$@?jJVG@ zDK*z13;||yrP2UZIzqc%{j8SwtYxdJ6N=67idEu6@Aj%yg5-~tmqh*b#D6BL8l1(d zwJIV%#=lBbdxaUQ8kz$!hA)IMbRxCVp+PaXM{29FRF z1-X?+ea|Ab!(Pmlp)@y^ z#LOtF@tfk=PwGNMW0;Rs3$X^)A)~q+z+Xr$>EdMBvxswJ&FVsr@{RM}$Y1L!uJm&{ zkD*2TEdRhIaoka43Y^zGCRu+P*rBAW+5<1OS%|ptaT^-xg)Q5 znl1p5HPSN=Qo@Myc+!J!J*n0R(SCf+Zrn(p~c0EgKXA$CDO4hx*D`>)d*6uFU zb8186JcK>H*IDluKit!cAV0tua&*Jl>yG7#ZhVqha~vqJqGi2&E6pugG&2+&B9IS6 z4Kod?$E;6w+L*PCXDLM^5V=nsN?Q+vTnRH#yXO=8vvHi{^sm|2>kP8Q{;C}RA=>W2 zIkQRdVO3!CLYLU&RwAfnEp~cWK!_S56#7*0=V^Wb)SSV*BHH#hAnJK$qp{%j+KZb& zxF8dMo;=6v6u3VF5#)W@!-pcB4itKvR6+%|M0Ar#XpYr`9#APKFv~kL4KodY<d+Yk@M^kSbjY!D20Qgxxl!q9{f9a_L??B_%Do1jco zMWjk?!mwfoanZ#Tm3~|x>9mFA)}*E)yj4MQiWaqSpj=#zLGeZrxu>|Hz>~S5NwIXi zM`e}z1$Oa94@&d~#;-C=FSu5BH24plih0BOpqZ$Kspeo567PW=As5Gftb?^Flm#I& z?~iXw?SkThI-8F~YNuw@A9$uv6gL~JePQN6&3kl2cMFP@{Wu0zkdO1#7T9ry@ryfl zj?&#&cUGLkmkhZ+w6koQBJ~5EYJ9hmu1AYFFWx`wYNkI--y-K9vv!y3@vD8rrk!yN z@C<)K&Oc=9o|hU{JDB}AUt*C2LYpkfvB*FqQ@hV3h{b>f^~bEwStZ&rnq^s&KdT`v zuEiZobqM|^WtMw%P)0_#K=nDg!phwdsVJWWm!pF{p`rA>mv9O`Q#)m8?PMyDD;nq$ zhLS|A)MS-AWl5=4|MS+9_%ST1YEq0rSt4+Qk-3;Hii}Qx7Oapt1&?HW%yx>;sR7@y zFq0T1X}+6mFH&%HUy-y+v=l#yOJX=!kF3k3HR*qbTdqh-Nuw}6BiiD+=}(ekH)O

OhL|^RUX?C=ZFsW&5b7?-nu%Lr5*kV zu?L=%JOF)xGP)x6nZT69_DYP*WjARuMKPuF#BihaqsuNro?oxVF+}l0sv;TVKXtbo z&?dFL(hpMlp(ti9)C0B!%mSMI`lRTgqkb|&(G<}Qt-wq54w-X5fq{{iUv1%gHhZvt z1_K#~;+sTez#bZyS5w3#6U+~rHj^z9tT2w7kli-+ea136G?>c~N&WD5TccvOGT`a? zO0min8MP*8vW&wOaz1C#yDoKgC!4uGnfFo=J~S({^I7?dhD+1u#@Z3R^WW{%K6Zq%pNjRt9kx^ahX*%WgitjLa<5yS7C%+^i)pJCwOrccBW2B&Vx1i2&xCTg- zt`PB$Jxtbv?-ai~TC8mfrUSq*G20?p)&kNi=soahrX_lQW_gIfJVm^yE+J#T(o~UQ zlH(6(@H0o9MFzz_v-PL+WQw>kuw+!g5^=kE%`=(_x0mk8zEEAKVEU}3#?zf=#3I{4 zXs4htTSG{vZ{rl?TzqTthJ`g)8haFFUTPEV$t^#T18(P3!c-pRfM9hcBQ$q)so6q3 zDoydQzj%KNr``95ok-~rD50GaI)f|Q<0FiYQ!&!M;092}_q?=}7LaSee;szQ7y_1@ zSEIQ9A$hh42M!(bWtRAAKUhrE+LSJVz>xSXfODEAF1GWP)dUk9Jy>m)@{C4*pEgn+ zYuDJ9MoGM7KD0bJR!hVmJcKT|9Ri)=eJ4*PU*q~Buw!@dqbPwW{KNf>@BzU`xZ;&M zi&s7?+p-gU7;cxb?*uV?iNyVd7aYgyb+VqiGHVvV(wwAvl@}Q~!pmErIlHe;{LkSRLKJ~ss%-Gz7%3#{`3`lzve~quB z7QkfHss)>nWMV+2`-sEGBgR0J*f|COi48WMbTiubq>GpClh!oQ6E=)|VO@tX3)I4M3ggn#OAEhgBQX5B z;TOV?pkmIlkMGA{sCsQtzI)o)NgxWiVTuQ4@$h%(2eu7HR2-wyRopj9;H`BdM?)4P#gL;i=IFPaSlgoLfo>8tCdr9jhAW_8^ zwY0OXYi-$Hy|?hNG+x3%Mp9&j>ep-OCEP1gNDR1WePqpxx4;Y7)N0;}!BAwCU}%@> zfkm?{@cObjI>9(fgG7Eg3N)#WB)Bd8fr!sPO;)fBD=bTv=Qjs(u%}HPQXnGEmgN;T z%KlWyT2x30i6&3H1@@N1)iw$X_J{pSH$+#3U>jQ@oY;Ik6#qS(#T)B{Qi;kHBEtJd04Lf7wnsdLf{}_t3S*B)Q8`T zKOwtXvy$Ii3d>0Sc{KS?fjpvS^D&$kpMuV)$oGU`A5H)@(Xt$+>9Slr#>nrb$MVJn z8uYxLKhmee?3Q{BM-m~UQA!Fw2>WhQTah^JLq7-R{t*Gv8%(&A0PK_z4(A1$#6>D; zAkS|~%mX|BZ=z<)9=+szsP6=+7}yj|40#kb*uu@I3q?o4e)XGZ@!%M!M(`K9RE-)7 z9~)TRW7P>0HrxsaP`4?@u$+9bWvC{sM>V-NU9OKn9u%cU6+(47N%P*Eh(M`BBX|o$ zq7HmGED7lG{JoW<&5&ykEOI_tlFw*Pe z;nBEn@y=jken7CS@b9G>w2OlLE5ZnvWiA-uFMQvkEy~F7`Iqz&v2kmg?c+Yfq5GY- zn{cdY^Q}5W*0t{@upK~BU0Atl}F>yr6( zDM^0uwq!KwIvNGx(7|B7uoT_m+k?J|nGZ&_{hp;f`I~JqM*F?Dl&mAc@*(v#+q+y` z9YW>{Cyx_U0b$J7ZA2vI3#gupiKU3gEvq0@VFlLoWd93J(TjLen&$4`aKl|Z%fbF? zw|XbXuL_GZ!kyU7IPXwB_z{jhw7TI8zsOF!;`#Bv*nZP6Ne=cW+8bbh9&~<#&xD6c zC%V3171=-Oe98EyExwZY45CTj z%~#f_OtHPL>QUizs)e~5Ax__eAJSdv1T(>a2W6}5(al^pwt21}mYje$^Q#`NdrkOT zla+fwC-H_F{kKk*4btG-$=y-CC!C(2f`2_FochQjwr6k0iJ{+?B?z0I0h4_l?^q3P zB)EpQb?Ts$c#Fn-Wziq{{(7Sp!A2|bc6YxrVXcjS#0|k22^!f?>(7o0CkXAoM2*-6 zD5N;c&?MOUbF+f9+wey8GtaQ%V%Qe`IGXdYu7EW`ioT@?Pt2~KLcE(An`}SD^jtM4 zIsu0Pxd-@Uhn;e)E*9~*u*^sm8QRtFl?G)H;#x4|1}D&S*l5&9k14k)1B1-vqrEAD z1Ov(GWK9~>uv~*s^-tgT)^0YYZ&7GQY|k(zZb#aqP!Svk?~RIHVmN9j)J$$e`;Qjk z^62`GP$TAyiL}bx4sZe|mn&H&c%n6$X<+F1wCSuV)^PoyFL|{iZA4B`S)j#rG@KQ` zM-)jrzG!{!tj>60U+!hGtcVP%fZ+NY2YP~s#|Jngn7w6jl8~u(CG^mI9DeNk)iC@u zu{$x84~B4z2kEGG1{4yNSLdH75$9)9NWW=Gb;i)hklIkbwSE@(O?4LoOG;&KMxwT3 z8OlJ8nV6J<|FA(SEp$9kg-4EMNS_Gl>Q1*>+xyFI%!ZCOz&S7n)jB>BI2p?VgwW{x z4d09JI^3N&tpFkMCa9nm6!et{F>MCQ%#>(Dj;^y>MIfClAXLQ%x`2m$LZ!yk;_I#J z5fCe&8@uv*X!CbaKMmK(PC*Kc679q(;;vUL0iDha>A|mM)w&!_)QDoYEbcYxXP~sA zM({g99rUe0H*9U>_Y4%>fe&cdT)4XzFmdFH5Zvz1{t(2XfBs7_T7%~zoU>f3G=F_$ z(vxyg?8%Px>RtZ&5P?k>-lU&&;1@evjx`!Ydu^o7S+NUvaJw`^I|z^pqT7Nz=L3oP zu0cf^CehEKP0bTDynd3&?|$KhO~4xJL}K&B;7jvbb~*P+B)j*x*TR8$N5gqe^a*Fy zEu&|$xEzi8*pZljdy*w@MJq+IgsPmJHnY#kT^6WB0ick^+8|hs9MzCs8`32M?odL> z?*!umc3{AzRE}yWf(N|&()A%whDbV8eQU(B*DtDV+XV4BlzMwHU#w%Kds^23PmA5J zC8iNSRZOx|O?Txrno}v$OeSE67BWHdr7DMXP!Ca-V4oOLNmAj^+5OeE+6jKuqiO4O zW6c&T*s-E9{~2!>tw<(#*P-(R2pF# z3pH!KO1)mhF)e9e`{cL^m7a{Ci5V$)s0$DbWim0yIuey+_#gLNU0rtX%O+@swQCsl z9$$~@=p2BwTkK~OjZ+I&VihCBM4`iv1wdDHvnL4XCx5jaC-ZJ~|N4msH-lY8V4eFxGyN`NL*&#B{h&Ny7j-98?ScLw{;Ldln4+*D=%O zgp=b+0z)D3qqs7{+tze|=X8qkE@_J9f{sL|o4J;D&RloSjNr*9atcFjiA;s4?_v_2 zwGjg8`MADC4!d?cbpYgdB>tAb9cO6s{&6YAxQ97<2!jbyvFHCGvC5>%!zPc2L1Kp? zdZ z%`h?mG(eY|p}x&fJTRO`@h6;?$bw=rGAYHRlZ2gBi_ZVHMZ=`pKad(Px(5Irc#Jvk z4BOBcRle;ShaV*~;%qW7^w!B@nBX3@6F;GRyU^0>b33r9AzoU>jwX36wMY^2G57TV zD7z^R;7SHxz(^~ReVcn|Yfyb3{ybgSep~xs)qQp03NQg|=v#gLQGiHp;WObK+A(3g z1-i1Auh4nl+52_hGSnum=1uzv@8h#&zYWg8C;dhjW6d2>o9+c2*v?*(JqTXrq$tcl zWSNR@Rr%9+A*%_%w|?doL+T8|B!Uf2F&Z4_wV+Dsp}j!We&FNW;K6TT^f}e3!beLD zCFbK#wTuZQMF@5$H?+}yK&f@8@4e+K?k!=dF^JdTnKjmRti=*1mWnYYHe2U4`SoVT z@%gUCLT^3*E~6YLWi-LVc;)!8L+QV!X}HMMO3zc&*B5c7_Zu3N_3%p1U1i#*x^!+k zK9p89ZOf^abuLuyjGsg(vr-ux$TQkX;ULZ3qm(~WLj5SzVqBO2A2?FcF+{9&QK zP%~4Icmw+(Y3(PiyvS|nh{F$9e!oGee+DZH4fZ`lB-wUiwToK6DqF}f41Yld(@7f8 zpdB3eCsB}nL3deohGWK_`QoPk0+~@az*Wx>BSSKs5P^*oB#g2@A&BVCvzV8HRj2D+ z0N(96vJ8^PiMnV|2?1Oi$B}+rmI-dj|I`-wna*z(QIcne54BelGi0V%O*pg`$tTqJ z_l$0$P&U$FrsG`zS%#Mej%vHz1K-b#S~KcB@e|Y{ai&WH)X0TpRhd3XxF;tDDg%2& zhB{df7uZPm`=RBel;VrR&b};uaq!wh#a11il$p)Qq6b4Gw72(>wpyjigiD2sX{0+? zHq}H=I@MCW8GTAqClXb(UM$fER)O0ypYjpRxy*VwMgvUN4YQhcncR`jolM921Y>-rI+_RrbaL*^=JRr zXbF(I;JhNUfa-z2Z#F$#T!Mr?7Dp>Hk#Rg=9vrp!*A#G%NceU&-RH$!e}Oa zW%cVd00bKonrKLVHXC)Dd-P-eyil<0Z4$%2b3^+}jy)1cpZYE}#GVYfC?|46It}(Q zbdwu15MZBhrcm}JDxTLw?ql`pE{-9|J+L1*Ai#_!s`0}>?}e?(jeCP;KPSU)Zly43 zd0l|oWTqB@+DAU8w#T|#4jA4^UQFs{m3HT=8$iR1u zk89W4tfxBIFpObs{H|Kd^@cEPJP- z)=?z>h+uuzC~XKd-ypQbCK=Tw?DxU*!o6lhSr@^{1WS&(&uK30G>8H<*=+j_phdjz z#kxeN+FDdp)(JE~1Gm(}^mqU)p87f^qwoF+=5wUIiST8`qjnkly zm^p>HxFu0cy-g@HFo~M^SCk_&GZMnmh`#Qz3_SR@Rt3B`Qyg|WW`h0vKn5=L`>v=b z4b-8gE%n*8xG4jB!p^_|q8}FUi#5(qq@#o}FR;A4eQ;&NO;h=4h|mOH15+3%S0_Ir z+!B&{zQo<_6CMl${SAfrJ+CPPTKsG3+Q6c)&L&)dU|+MJ+oH!8%7?*g!SnZj_rOaV zMIf8)_m#Cl+!*U3j%NZ(u0AE*#}V?-vY?mE(h^f=m#AkOhy@z>(EtVvy6@~-koIj z6F&D2tD??wV;1M(ekPeLN|L^GFSZPIE=tX}CzF+C&?9!W?D?ukVUkdb;u~Di@4O

6M-!!Fp9~+Q~;GF`2_x{h7*89++mIjh-80&dFiTze0y>KxbgXKH|SJW z2Lvqm*|P7(%VWJY`UH+=nx?3D89tz`x%$995}0=KX*~}t$5~*rvwT2Q$}p98K+=ROn3o__=xB1`(x=iMLscjnjN=zHm6gvo1XL z^?iz-@+SD^-AvXhbz$rFxMs%V6C-ZnNls5G{2-nhA6V7A1pE2~_T7MecpXLBvy(EO8AU|DXavfAC5Jt6N zO2?X8a^R?v$LipHx5y4Ar!N>)in+Eej-289XOY8TL>s1XR`=4${1pnFllRzIMJa2``|8$<4VPy zjV^|&*=k%jf@1ixU)rCor~Q}KY}vBt1gGW*0ZX*#qDD)YUsjW840%z!-oqJ2?GYr!ID$O z?R2%;!0~1KFMdtX6+W9fO$@EpjM}eU;_lWs2|n9e7`}L6P@L0b%s@axgx*FwQ=9-!1Ovrp;(hn2l~`~-fOIR;%rF?_q_b#XAb|Rfo1!h zG{u@t%KZh2*&5Fa<28l@oBfBS%GboSVH?+li#d6rdOdu0%y1}Rax~KtoNGIlI8VbUj72utR#qtFmmU9U(Qtc#Vyot-{XR}}w z)PLQJmafO(GXLBAgH98e*P5(hfTU*DI_%#_bXxoGq<|ebmt5j?pvnTwV&`JkOO#t( z=+Ps2+vc@TDdsW+&D8%@W%G9{<8NSLtOj zeg-N5am}la0b`CJi97TmQBX}o&|2N>dO-hp>etZORk4c#(00L+Wa-csF4ur)irh)n zcjwwlt4mbUq50PSRHcbJYFs|*v1sVHFtzjQV@kM-?b~hv3~|5(c5`T}k+zs>oA*Ac zYGW#BrzOi;lit@mvWAjXR??D)uY~Q$zwEk=Z1NM?%*i0=stNvyWAoP?uYMpjNrcyQ+hM%G zSy6KM#dpdD`c)dg}0#b!sD1)ZsM*f1LlV9b3jxh^=bEsafsO`#Rw$#j5;vj!o z#xj?pMZ>X3nA=6jboT7W+>?9hPJ6Qhv_XK=)2>K~zX;%j5Xh?&}8ui<%` z5zGtre_kKXll#)pS)DUQgI#Nr?<~;$oRcZ`U5ap^?VH*=eX2RP76&KJ>L)MD*wJiP z$9U3{s-e6%nl}3QR!hET!fh4W@^F5X4c$lKQ+p4utVvc;GaGN#ls z!=SBn9;lXLV`qyG5KWZ2)%{gLsQwMC}*-%Y*W%W3x$R7!k0FpN4!ufcU ztSW^M6m)0Bb>VWqZds=J{H1>hsvVX4C!%{YAJCR<82}|-Bj=k+C3Sx#kEV!S@BWj( zE=8qBLB}y+_!?rr1Xq+T-lgyz8}5)iMv)Cy&MpQkSy-flXL~(c5dxWa7GZ67eg%Gw z72pogigt6(U)Hg*G|r_@43YP4rko)$`pANom9}KeVE;3b;l~}B+0r77c9q#uaDo+3 z364Ecz?YmE5|N@`zb#@%eG8UILah0hT@Xitq=y<26YQEWN`nw3^j!4hZ?m_)y3NIk z7aWp=6!I_EZ=1hz!0Blbi_Wg{71!pLHF6l6vZ6-bJ4kN{;_!P^@Zo4X0-_pLm46~l z13%U|H1(S6caPU}vq)-#beZzRV1j!62wd_r5|Nc(;>NMPU)vYs#L4s5eiN76L#eR1 zDXriJ%(uBJQ6tlKS!z-t%XFpdr%;i><>@OEN1k?pm8_?r^zl#kB)1pkJ<=`K=M7Uh z^dEom2D|kO!XEUc>$^@&hHhBl!HDEf63;zAm9haoq%l1X}5(F`>CEOAOx!W(A1v=kz)#Z>@pB$u{gw$GrX~=mP zh-^kwqg;~^S}bUOlfU&gXS(7wdNwb|e59|$~fI?mdMQUAbYa^UEUbe z7={idHVQrO(rs3W(NCBkK6SumE|_Cel?hxTT2#ro${u^^28VXJI>dg*Y6IFn|7{duFZ=;gnc=Nw{)gS{il<<# zQDnPpik98HB*f*7B0i2qdTB;0ynj}!@9WNDDj%(i>EY5t#_vhy^<{o9j3b%d`JoyZ z5FZ4W;t7W0sonU$Qa zcmY)aIF(~QCw%9pe>9m#UGR8xxQK~OHYi~1MAG(YY_S&v*`|U-(h8E*I=gKej0HT< z9UbNlP3E*h)P$CIJk(SkDe|ut3(?hkSdy>cldm{QQox-^c0P@VB>fxVoOoLgFy1K^ zKXoaKJR6)n32J%d2i^0p;PbEW(ZzA4#cwMfRjjuEO8yz=UwVL9;!zoBqKwOgZdIm< zDk}K)=QVmobOK(mP<;ip#HWQag@r67xJVoi@k*2-D^rIOQMw?5#l-O1HYro%&m%?h zHE<&-R;*yQyduZ<5CL^7CFH|s%xR1CGb24L2U>}O{E!<-@XL$;2-N(;mZtwSi_hl;d!P|{2Z)$enyw)#H6kA$Bm-w9teVPYmN z$Ot3sMTh|1Gd;gNdd+zpTKvU%#z~soP%d<(OV*}JWr#2#I8SZpdrnwh*=1_>yU>&| zSS0hhhDe)!`9Nd5=OR3fvE>GVq6?i@Rg#{gPO+0}0}+{tu&sB>tU)GIP`d(zQH#NA z(KpaFJ_sFLC>mwzrbt^n0hxyE8VveA?^_@KJ-52}&=%^@R%C6ZUD?Tdu?Htb|2OSY(OLYoipQ*iA#BMU}mXvh) z)HIN$yjv)}SjfBv%Q1g81zU6^Ug4;H-7pwhK^Opi{F?(-f5JOXh{G%Q-E7U7xY#;5NM77iOq5u8x(f&MOlmf14Wv;+uoXU#T! z-8N~;^40l)-)o^4hvHP?#d{;fb3P?%&TkY7%n%t{DaiyneWNdk*F`XKWo1=hf#8cIO|i~e<{X%t!%pB&xJScnlT z4aO4gk35{Keo*MDmU@nUqO>?zQJ`3W9{ySqX|E3bl)lFYx<>$^dvd@SBEq7W2#3Op zkLAutmS7=1_p246(HTIIiLNe7!FjP8htQb}YW7dsgDnW;CL5H~Y_PC}HP0P@K^E}V zQCAncHW0ZU4>QpaiI-3b82W|Y&j}??fqFKy#4xnPmi0ooLzSE2UxjUv6` zd{$GbCAjFXBzUJ~<5m36yj<@cOKr{rza3bu@0H%5MG(;YEnn{Df#}~}(!B=?TT51jxDF_U&75S!5=KH^Yn>TxLxdU z^rIZiKt#LVM^LY#z}(J@I;wUA>BYMBJ7v4MabpKfrcqgHmSd0^+P~u7Ll`0!MQGP~ zc$z8d{}5b)&Rpvc6UygNatx~6XOD4bj}LxgRuStpYYfkoC6rmDyP)q_<4fddRsr$r zR@A2^b|q8j65Q3`*xha5(4ML#vZ21pVFW~?id5_z%c?wuB;`2;@%At{I8purb{WV) ziA-#c4Wfo&ta=KlAh!eK7UYe8TpZhjG5i|)Bk0v`J%5o99Aod+uW+klM|#gvt8M7N zGo%J{PY-tncu8E9{{IQ;>M5Ab3qW@IKY<_S9)+-PdxrwIqV~C8ln#rc(2M z1}n(;b2WH;RRz{C6hUY?W01g84%ScbNTX#Y|VqJI!Qs#<*pWacoeyR%*YM2-4;;F}Y8u;Es`u z+2lV-Vf&zDh=4kT8@qR=lo4C(fp5u7Mnmj@KIGzGger^11QxK0J)ns3DA6Q4NKXv= z&_qK2sFlMr3}qHtt}jv_qc*$5HM>;v3cw`ElRwxvwww_x-SUfAVHFL$_~kv@>rF#* zBh&XvT;g4k4;YIAk$e6bZvNMU#Ab8E0{#{atL3uEMvHW+w&_OA9J>i1=<1<|G@_2=Mx=mc=SXNlc#xza$(3ZuA9I3P1NanT&7@#skO-XjNG6>_?Ot# z+g=$CErmC+0Rw5k^a^?=i)*AisVv3W(2e9J=U~NLNoKV6$?i@5T39Xl2!F8? zASux6Kxy%6G=8CN@v7Ldexs-eb>BI{-LWP!wVYuU%GR6^sh8UHkk8=J5$iBGFJ7?0 z%b(k!n6*KikdXb2al~~avqBo+0l1`Dkv~h^KYZ*znU?y+9tG9ksNu-{vzt51;35ju zv;VHNh@VtVqyx;YhrD)6D{ApCIy=vDK+n94sC#*z?LW`ee_~4gOOo?tcR$INv1c~&auTU$-;#AV3iyXxzBNsgLEbXLeEZr$vF8>3*uB!46 zd6-QWx)(S7YnVrinBNEH2cS*&?w`flQ};#nJmZK32Q3hC0ZQu<1NPKQCwn!GK3!i~ zhgg&-d}{0VcTZg#T}o`#sv|2EdqFURxTKXUjx`Xqej6lfNauw8QzQC{I1}+ZTjx)N z_w=eF=;3U*&vo%+;fiel_EU-4mL`vz0H0K04aSqj4U&IIZz9%1b1VoW4Fv%pZ7A9L z!I5@70ap&)UZlqe^)K)6)=)&flJ$df$>yLnOOtu+SDiIqFV?ruk(dnR(hj<4FGRH4VGd z5*aL|UT#J8G5A&<4|)x(gnnm4;FJ9a8`6-)U+C%#k&qEzxJ`aSdQGgFAJ0hXE!=-* z%oTiHHB|l@$lAH6 zzNekDw9J{aD@S2=MLDul6+TakRPl!?CxTQOK#B5LL(8auFmYbw zO?T_&9T>GsMxtS~5F!v!U5pxeDzd|Ut;hNP?T3%tjZkWClYi15Drzt8Y=`zg1ms?% z=Mihtm>p~`65|0!9lVuJg|pr9;^$Igg7GRcr5T5{cQI7>j><8sQ{C|?2xo@*fdsA1 zF{jKn6C}{VVsOGiNJ0elQY2d{Si&f>K8@tTa#+GaRB^Ak$7mrZ9#KbXC2NCfna z`a~(#Pf7{#F-L220uK(?e({AsJTE0+fr+P99uk6OqrRBsuV0N(2faN{_|$3wwd zp&|yrMKPs)ciWirju3$nGl~ER1~HEHvtzw+w+`h;7|U}@&6PDYQK3b3NwK`dRdaNls z=1}ZOKzim^d3k@A5KLa<)cxTKCkwbe!QN`DL=2sm4;t1L2eqw@41ZWO_D(-2m0SRy zH_O`=3vs^~4chUbZX*rv!-!raUVKHW<-d`6xq&bBK2yfM7qbvvn_7%d83zU_=ysg> zlJejLlSo`m`kMfOK=2m_{Dl1s>+cVaCrHwDnILvHhF`BdoUWWZB{}Ho*RPYaQwbJf zV70M(aTRHC*H{<&P}esEX~#zB`-;S7Vf9`h%^aKWeJa74e~q@K{ks$G=nrEoDD}fX ziODl3SpZjZ3faZ2enc`cZ(naJ@4o+Yg0oTl=KHA%g=(F0%%Ylb0gPu`)l28nyOd|V zAJKeVpHuG6fE?Y`I)nan74Vryvpj)!Bs6kw6ojowpcIH7mdUeil36eW<2}Xf-Hhe$ zp*pF!Fy@T1{&TAO`^66@Iy=ax9se>8by(jE`M^nz?!N&$F_tzsZl;$ZZjyK18aHE= ze{d$(rkL?mi)yq59j>icBDi+VFe?n6xar#YK(!r7p zAgCuzjEyRsHvY>wj^%FLZIaTFg|1V(A!Df5_pM#b=!}-U<*ZVsqrZ|Er)lgZ(kV** zSb$0=)xe1P?id{0e4IRV1`^M17qaBXB$Yi~N<>83>q${%;L}D~{90c^C$|*y(1U*i z{%hM8g^xKEt6(%Mnce@$~g6rfE)Vh8dGSie>VUBZujsfIJbG zY}7*BkvfnhGIS>n#~+4H2!tiq9&Dli!wYVS-+`s?9vFYE8mT>!e2)XNw`x}yY9aUP z@~V&rKbT8jgmA7Xi~*`Zj3;_%HpJx#XGBwt0P;i}vJRk9$L<_VD=2I4``NEAQj{+{ zMs~f=X&?`#L_)cEVePzXUGu1fZN)r=@p-atQ$)$q0GKZHw$c`Q!M{{@`4iwp(aG)Y zYE<68a85oTH_VNmgrVe|;bW41d1!HySEwGr`(iot8st|v6mI~)gh4|5U}S;!3QC|W zBS)do0m<=Qm;@x)G84UDB>aKs8_y`I9F4FxjNBdY?JhkhCX3Q}~ zvY#IZMQISl%fDrbP)~%J5Wt5?tgKJ^2g9&Vx2(gN#uWj0uQ$)YV3%Q!Ll z9*|uwY>VY!Yd*fh^}2&`W}tHAEmuE6l7;d{LGb2m3;>qOaglx$a%Iz1*Ashmb5LFn zxFYtZ3OS##Oscb?cFhWz{g(#OpM}o)f&e6!ac+zmI`0cp;f;~Ybw%dJrt8R(d_F_G z`>bXscF&?>u%_Ipm~!=xVgu0HACnbv&WOXexGP_nT|%@-5yVY?a|#(?gCF_(Y?qJX zDUH5ZOLw+OMCwa-vj0JsQW)O@2&@0vRHLGPr3N%zy&VAhMGNe*`bYpyM?ac_FF$j# zQSl$gHmkaHhg@%0&wxVg0S^R*l;@OLU#x#%Hck_}J)u?6y)ik<=#yuRH@PdnR{SAB z{+ZEGS9gnXBi+JyO5?vg%A_e9f>ICjs0|WEco6| zDwxZOQ*F>Cs~>gBBE67tK+sJIu5;e*g999 z0Jx_ypC6jiJ$)2HHTDE*+P;Y=wq4J^fnoAI&YpxF&t}Zi2dt0D-YrzlElhA7nDPk4 z`k9U_YAKIZKeGF#jXVZKNG9hm#eb^P0;k(Avz-v!LI7TJx*K(xyKSo}+qs1QnVXIz z`D`U%6!u`8EidH6hs2KEXoPsvB8PJVJW2~2g^_w3gPA#z#{}`^9UXW;^|rxYpilZN zGh+ea5<;8+$OtDotmzxidU#TUF~k&?Ig+coghGpSlJ+!nOn8dnvlDV*O`UrG#JpL} zkE?m5aM-#-6V;&tV>c-ud_U;i*d__WFoD>0Bc?1c^DG>+0D=xL-NGu|?NNHJ?pvWQ zVC6L9lR3s=uy$U@RdU}d1tzhLT-b84Njobzd!E}Ad`mVW?9mN zap_#_T?OU7A8PMxKe?#~sn_0l%0Mu`xSO3il@%@Oy! zpe>K~Bxn@@jNv|W;DiLCxQQ`LhXB)U_CNT76)2L4rvB|gTV|_HL5e_TD*-|LEldJl z!3XflP9Qb@!3mT7|ClhVfPb2IppNpWO}Dep>@~qA*=AA$9qHq&XD+$W-Jox%m7vrB zDgL#+&m0MSQIbhS1i+7_EzAkPpHImdei}|sEetzLl`^2onWn#`q42Y=MBItu)inT= zEak;q?x|E-p2%)=Z*%h(1R1k6fj`FqE*@3TuW@)Mpkg0lvJs(d?9!%PQSi)6XM0N$t z@wD_{+>KT0)Rw-USmZoTfC*f(A59VazrhnO0h3bY0n$*Dm0Gn%{?A`DnY8t{vt)zZ z`o9%qBJq@cgc-k(5J8izSzhhA2D7R2&rw2I0B^S2&QM|}!3pAO!%RTgqLoUPcu{OH zW+J88l?H!5iiCQDNwNo=kuq&;romGk%;L9?ak`*PFd6lFftN^k*U%SVyD8%Gqt+1* z5ZSzJ9e`P=h6`3+&enHGytb&?C}f#;`S)MbE$?q9m*CthlvF+4au5-JMk-bK-d<+` zdpEz_obKEEzm9bIJr~7S0edeOoxfdO;|cq`Yy*21d%u}8@cBReRrFsI^7(tCIa1(m z__ACyktO^Jc{>NLe-4iq=Bf{TdpMlgf9&BqTJN|Sm_P6GeL7r!ruKh*`~Pgrjr_m8 zv|TLC?acq%o*M@^fCLr(c}ql|ICXLv9tw(#;D7s~1N(RX@3jAZ|J!Z*R9nHGkQ=iD z@4#Q{q$APnn3A0Wu~BG?#tlnCE{do`-}?J|_ZBib{#M-K8j-+UtEo?cYhJWV2r>Z% z(}vq%$#j@URAMJh29;dv!69-c6GN)A^v&(j3-p#&r(0vO4%1P1W2?a`{n96r&#)i9 zaUMOmO9#g@KW8S;qQ5t%J9}_ud?{-q^>ctf9m4+zfqhmx$&Mra^y5^Go+7}Qin#`@ z=)$(wBZ$gpSDAX}0PcmRzE-^g%z#W$a$Z0S-PA;bjb-HTzS*eQ<4zw6LZT(5FY+KJ z{A`0lzL)!{#XZJyFq}ZljlK6>!MQROmB7J+`s==?Tb0`6XJd(71l`|Xv)@qKddV`r z9`br}GJmXCAiUi(I5dPU)*HOlef`p<{WJrG16Mtr5q(%!v%YrDg;EavN0`s&UZ>;q zXDIiJeU;;yO|d^}m1xV`6UdMCKn#be9bxxj0Muz z6rY(T^t5|(xjiE-yx+*0A>v7;-~_x@;D^J_cnW!KxyqJ5X&AwsW^ z4!hUm?Tm1yuf^uqu_0f0-Ia;nGm^#Uc5~A4f+=WOG8Sh%QJDtZ^gQo1p}b1<`-_^`5cI&};1?uShM0v(`P{x; zrw}Y_gcr5%w-9621%rXMan@o636v+hH%a@>W_Bt06f6CZ-wvNLE3%ma%* za-0qAa*dWh5sx3j8CS5AounFkq!LtZzb054aT{FU?rZVH(CG8RJ`<*$66E}3ZwkRZ zW;*H+>(aD*^RJprzq?vq*e$>bmMi;h+BdY`3ah9!b`@ zQKOC0!spca{ubLW>5T4Hwj`peN@scW%jIRy4OWguM0+r+`;=IShF~^M>7%X$rN<+# zOs4%qH0`H>&=d0a`_K!yNoAs9RC^@UvV)o3ehYy`-=zUa6HzX=&Ttu4|jQNdiFK_XJ5rkz^Z%rRS{{AAWC{>z+ z>O!Lyr`G(ng0oP}ZEPW^HSEe!q7EOIxF6@5sIs6vKd&o=jkOckCM*C%P6yiLQ9++a zE1gpqsZ4Z+KwKxlDv2Bt&WjvO>dN{v6(;j&(co&Vk!UW@;5mxEN#-$(tWWo)JB@fc zO-`R_+u-2ZW3uv&GxNytXH%23XK}V&2{eA{$1HW2)^u|e|0snQ?=sSzOHt9Pq@`i9s7^22otI#SW2j@P5hqR8C#pwrc;L*uJU4@2#+5 zUL{2ta{HveaDUS1Wf?zfLumN6^h7J*a!TmkNK8K&!fjpVZQShNph9qacVg`B(!bs$ z_+Y+)`1>o8Vesl7OX2Fa*Y{YUR@@35JGC(VK~?Vgj>}0<7-hkJd`EQ!=#nn<$kAfa3Im-OM)u?`eEFp?e%t#J1E131$3!yA&O$c z1EM{W4yOYTzktsg5g5uz!#MD5{C0D-XfN-6?bQnkX9_+)>?q!U7V<|#m2r_v-$&0m zP5(uYDKyo&@{_)mzKyA!{fKJ?#zNY_t5$NK=uT<>F!d`J%dF@BHTDirmMzP=aM@Ou zZQHhO+cvsv+je!K%eKufb=kIEfA!w?ymRip_r3Aw7;DXi{37Oz$c&LQXMk&D5b@j@ zTRX?~X+jWuaY~YlSDAqh^Zq1w$Z8^U$OJ_Ng1J(2Q3T1szYmSvTBj^n@QNLp;x!UF z$ArL6w}=k(mu8tsKDKU+#nkt5W}3UyOpp7H$w=g^U5H7bQ%@(V;gM|BFv+x>Tsk=_ z>NP>=&+%xQtY9-ttgP-wf1h^OTJ3ZpX=Qmv%EEQIMZ~bm{Zs#6E)JL_@HY0ZMCcV9 zKe6Cv@4+xTd()%u-)+%X{LOm51>GqE-2J0vH#~ak4Y)%uyb`zGUIrnZ8E@p-MEQ1b4hA;deD>NMdsks z@RoCoYBHeP(9(RPI3PuGs#r>5^VH=|ElR5Oj%HEy4vxm!iXpo@;s>@F;Ph9@F9r`0DfXEOp*vh z(`H2N&lwA%vV@t@S^T0{9O0F$f^o($VcZ-IuT$<2l>~q#9+I~6+HnzGd>anu>LSec zP5K5vo5~(1so20W9nmEUHkd9|0#BJLZhLeZoHbtK8bA6Zj0>U2;^HZ@ z1~hDvz@x0>6Yii*WLI0|g$<7d(<_@Iv=c`gT@bg^Y+mq+@@XEvC1Ys^byqT zfs?NS0h^BjS%dQyoEjj3ar(SB!DYQ$v`EK$w@bfLob^(Gs*dRahpS|8gfOFl8ZiTwi4Pu+e}JL2+VeeFo* zF&*;o$NAWCuhjxW+j$uPL<~Y0EkYQ<<}t<91cPzO!!d#nV~prPw<<-TW4W_ULj6J< za;2U_c;JYqO^GqzMU%nMHw~x9bT9l>9*pf@auJmIXmUfQM(Q7hD4q{Ad04FCY!Hgn zMYtb+O9LbWDQ~*z7&c2F0&}Wg3B*5|tyDZ9%NM#4E;K@dy5?X2v7NvH5m{AOMi;vV z5vn18mZO}dWzfGmCUDSJp>oYQyrJSh5nTv6qCb`ugxs2CC*dbaGTC&4zt^WrwKNhz z2Y95ur~|l@Ae`|?gl$Gs399BB6U&kx8-RDf&bUb|AYO13Q3jQ3b(6IllA9r~3ve;_ zeK47O*+p3~!=V*G&O8Z)xZD--H0UZI@-S!#spK06ndk~RPMHQ$7&#Lmon>DOwoh7M z?Q+}jH}o=!ed;M~ODjPe+C;L}`Jc%Trih0U%pT;vEr9mV0zU;?8dZ*v@8=v7#%*m# zPK+&hMw9V%&~=g;(oQQ7Y&#nQC1@S)=B5fb;;y2kpS`j)Ert`b$hNQ7NxbU%s(TXe z-EApw`3QXe{RkWP5KUQ zoj?2El2WfYVX-sX9=nsgY*pdVqxUdPCHNt+i3@Ly1SsEC5z80eX>okgK)N^GDAuqY z29EU&k$5m^JyS1d-U_^LW?FFF=oWuZ?aMUBn@As~aopojzK0fWE~5S_3V(w`Od_^l z!0NF)@UvB%t=C>3Uuz@C7|CA@VY=}hrP!R*#*uBm$JS#u4X>Y^H(lHRP@JA>r->>u zGkKRIDPUuHUX@HPcM7+Au+`V>tmO&;D;=eG=(MQkY%`Wj&iAALB$fyxjDjpMH(q+o ze8G;~YHTlZ+Vr!9>-~LpZ?r!Velz#|1sT*acmU`ASh`kL_W;?Zr9|Fr8c+IyCxTc$ zg8@=KM`93D0-ucu@@-xi>1gj`{oC|~oX`Du;^W4?v@jcD8nMq96@>*gGos9m1`(%6 zg=XKtXz4JbW&>j%ZD=r3xHtEjINW-tspAulxI|@iu=?z}$zm9R+lB8%Hn%(PyA3vC zhj)gAgwfMY>~wGYuh`A#uQKw`q(QmfGCSGdUq;WK#uGz_={CKrTqP^Saeio(e!i@Z z&aT!<#&ZH_?W`!{`Ev?tbmS%xgr;_gXQcbsJKt>Mq2%GBBK;87EsJk(d)TW^blV6K zPVf4BRF^M;_vX+H;1$-QCRBu2^d|u@i{%4alD_1YEkm|YPQ3}k1PArQB%1@XwUR(l z%wLXn9|v?II*W{hC>AAJtO_@Qu)USc$ZykFKx_qu_D0HAUU!hcUg2W`e7Kg%5Lh@p zsvl9wP>%xWl-_%yql?8GA|!OtUl$}~d_S7Hn~o)Y^kr*xb9?FiR$DoO3qTz&Q#FS# zUC}U7*2e5aY?1@2RfM{Bn6ibhvfltp*xnO}^!u1Ew#1D$_G$+e5)J*PugJJb2A3zY z`kMsE=^GL%nVZk`&T*o;a8Vp-v3$81LD4|{Qw*Pwr8tWeH`XoQW{wGL&r2SNCnels zlsKb(UQzmdR}2f$4J;ZE$z)MM_;EqxcOttu=^;8)SS!+AP78u0cDA3$Ga9$=1U(69 zL>Blz5oQ8!V0(6%62KJSdovK9A>o51G9>Q7twi^zxqlN_h#JaHsNPXS@bSiRGNXR| zOdSw1U-e@@m4(Q0F|H(UAgiTPpb7Fo+=*-fdogNcv5k4;$3cy$aMPkg-0u}|nDc&G z?t(}>k*MK^p6V{1qnPz8(>Eh6li|%lIXI!UV--90=63|PS=Hayr$@(5b2h8s+oweC z>e#%=4qFWixwdw!lT|iST2?A-T2>*pOG^_k2aB5PN7lAB8m1~0u0CM*FrC{kCjyh~ z_bI3Ozo6gZ`5>zAw(?zl>Fqrqjvuu%QJFk{u`oNj?DeStvZMrb_f9pcTtNHBj%Wie z@A+B_o1k?2d+m1r_XV`sPcg3Gfd~X7^>DeP z+7$@?kMN_=Cv#~gVo@u>QF87tg`{ftj=hBki z{rko2FMqmR?vJyLz)N5I>dV<(x}3M2@!{^bvyJZ0x7$l^$7(zMtL|Z<#&vzHJdr+^!Dm zeH=8$e)#*o9QEU^oqjrRjep32zlvB0uFdmx@-2TJd%uXhb0$KDxSi=;E7kl$_yzk2 z+@11mi6Xw@#jD`jVwORVz~^-h<+%`nrT?OzsSC0{+|# z6V0x3+h!snG5BD?)N5xfh^5wR?14O!NZ_G`g>CAR1O^3K9N8%+%P`DzU(|TXCzsOa z?LF2fSJOuI;Jr1APX|qlXroWIUeU^iCSoq~;6B7n75TTfm#|O&`ve^v#j<6gkIfpD zs>n-2vgC7no;VJOrb|$qEVX2p&U z#N!M-^+z_Sk8x<2kcEiO06Mn5p64WU>WGPQ^QgOLWG#W?G7Y=1-)$QJpADpp&=;^X zR(mq8F)x!Yw2T+x^$Bg>t9M733b`GC1$Rp@TK1}F_nP!d--=m=ShfpQS*}Mx^o_E* zjU&Rjq1iMD^u2y(@*=2x8&FCZm>;GUtRJ3BntudiDnB1kH2sFkwI3STX+Ri!=Xg5U zP$uIZ9KzO&0+Z|alPD20_A?GM0&D7HLXg%DL7kmEyJ4twV5W{Z_b?8-cl&3z&i->} zp~e}fInq2xz+K9t2k(0%&m>K(4LMAcLD<>#z92Xb-)O651v8*8J zT?=~9(qiSv0&cY5=Gp||1%B|rSOhHNxZ<2BIS@dKT@Bs-dB z8r;Tn!^m_%VMemWp!>vCak;vV0YK+jOxmO&oNDcZ{fk2`9~0UZ7ugES5qB6%9SiAi z13t{B`+hq8+};^X_q#i{*o*sSvO3*MyGfReWFqj7d*BSZOw02ahJ#MY6|F3|x_){D zelQr=T|NngrzH!zWTfLLzJz0WChu-J)QQ#_vMoVhXEt|+!^@bL1>jls;j@A73p92H zy%pB<#LaeXEI&u!BhSFJ7^w|DWwu> zj4Ru$cT>>PsQPxg^NjHOs{nv*P*ZlmEzwn^C?w3x;uv~m_lSC2&|zvp7nP|a|JQts z_W9MO(f2svrH-upm3wzI=_Z;7`!j;C7xbb2s7HF|fYQBvUh^k`y=;dg0eh@?|B<>S zxa|_;Li*wkMUgm}yWA_E_lw<=(U$Lv&*K%GZx;x95}dR{YCzFZ@S}Pa^Luk?9c zHOPi3XqIG6k9nv1iCc2NJ`NOYnsMip_vWrt5_Mdf2ftAt4!nf)?*wu}VbVTI^KGdx zrH9oPG;>P1Qr^WmM@yXd50>lJU5`QkieRlvKncowE;1E zl`ywdg$~L#&l2L>DsG_39vh)afs=UZ$EZuTX=PuoReV!rSY2Ui`|e(mBQS}aY(rLUv^7G(=9bJ5yiKSDl$xj$QLjdY9-TGBt3u9tax z!(?cQ-R4)o@`DFrinj@Av!}WNJOyZPQDHgP!Q94*{Q-ZgS#h-8Avi+#UAxXZs6T{`P2*hzt}sk>IyKs304 zMMNQgw^AH00E*nWMh$=yA;BR8axJBq!!{Q-kesDsAroSg)Z+EB=LW)GU#rp0RIHx& z+%CKBE-5|~j8!U1EpQT>=p=R$D*UU8%_N)*oF)L`FSnsWBM=1bin$psQsSFSVgwl8 z(SQ~5{#CAmohRggQs3{y&QkO;bjuAR`WLEhRb`np!Ij=j!4$4YC#lJvagz0^)T&MD z%3M$&lZkmYYa};`~k^0MVH=@*UUHcy+#`Q zptgzo9;>6&q5XJ>x=#=VJz8_q)BQb!5QAtPp;liv9WBbYENsSN_^vtUo0ile9B``# zi$LNk{{5o%?2LsuziQP2jl!5mI!18BeK|X>X)W-=ZD6<0OuJuT+K;CNCA4&D#%#$P z*cbAwgKswAEi4JqTV6GtLT~D@L#CaPkJfWG0(AZT5vBscDnhAYZylQ9Fuy&9cdtrpX70hg-#`l|-+$(e2qQ#_BzD@N zPT7vl>W)bckr&)N=^@)A)wsd) z3o;6Ru6xuyQs0TYfQ&TMQtw#zT+7nWyV}ACMWbqv`NKS%b}E8o@g{jD*D05Xx-n(G zGem-OfD98(5S*oJHJZ!PhJ$T%X^kOV^(D&?Mmp^*!##k0`RYpxk21W``^h-$9fRph z(3?-#9M1zDtW2;+6*nRJ)VOkr8Zg;)8+Bqsp%+v3l~bC85DH=-*(e)7_*^1P2rNEX zA~R?VYB5+4dhHhNOsII+y=U663+Zl0FrJ6ZIl5yvTTxXR(tvb<0txhHwP-tGAZ?c% zv!SqVj114lafuOp60UhZcuNmK$Ykq6e=`BlKzO8;6(D4%Mro^`@SH}$qmDFfJdu%Z zL|e*SQX26BFLpJYo$+@@@T_t>s zVGBzwXkr>O6y8vg$j3|T7|e%J0&VvfotVk4LQsQsaKfmijF*7JD?3hD>-!0``tEaH z90~=t>KE*d#4E2<`#IqNE~L>;giGH$`c=W9P9@6{7LTjs7yP{Qv6>yi4}Ol(b~oMo z3>M~&(^^>r*W$wus?*wI@3d;oSQkryfgkJC_>04xlQ^L{vwBBt(wnhNGLk|eu7N+~ zh!&5>m#P{mT52AEQ5M*bM28nJeoOO|)#ul>NEJOFCWEEoot9fGLF8F4SLd2LH;^Q{ zX&|+NV!>21Y@4FL_ZXw67?#w2Ym6ggnh&V~eW*5$wdHd%!2jNJ3s!8?-7?;=CY;wzovi%*BLXG4{b4G0cXo96gvqGBqn5mBK- z022QMb-mW-@CZ`>fq{~6#uiWhTp-C*_{nc3d?l=pQ|&%Ksz#iJVK2?$or&nhopyQL zeGsyiiOx0z!mW(sKy3Bd)P^hVw}3T$T9@^rT>ccH{mA9J_UJUW(PKgE_`uO+_#sIQ zDsvK#Snc+`wv|;BMWI^=4bfOT?!%NrxuF_%g*6Yx(`H@7xi^#DJ^?y|(xR9F=Ymls zSCThH9R%cgWOAFKp++bnBh?GKU{KFZzp`1-W8Sy@W0ZRmMQxS}eFn=H39m{ik8Kyr zx2A~tGl{Rdfq?JPT4=^egc%Ftt;S;xWZSdlJh|;5(#+BZ_sO%IuLx! zI4z$j6Sgy);kKA`7{FUl)xK4xt{QN&RDTm`BB-OQ_3A)Az+DE;r%#XVry!QP(SOc! z=9TSSh-Fs-|6QS*2$o-V0V;+O3VIhCud#>!P+f^jkk9wL5sLTyi6H#=}YmkFpN^kw0o%p`f-VL-I^PL!`)v{1Hl1>a@vf0-P8LVCgZB;y!AVznJ4 zuu-LX0_vbq3Q}0Q2fARiWMsqpO9(cR-Ip*;T84;nrd>V82!G7^2+6IiV2WJVa5{$E z#TL1CKdh~mJwaP=-^f}{=+cud7*c|(!v|rOPb(Y^Rut@};By}y3;HZBfl@95WmW|$ z`uWF(&+yhj%B)C4td(HV(`@?{JV9W`&T}vPjLr^NB!t%08LFNHiwVJfCrGx`os6q$ zW&-TrdJIPS82-UwgS}%;1jkPJ9E5_A%2gH^s@CpK!`=>gLYKL8$__0+tLnmNt}Swq zI8E5?ikZJ`?oi+J5~N;QuRC8o{nAJXGnU7F(5-n3A3sfj6s3Y~l?-AY2Oyo7mYu>* z;z?-DE8asIiI-?&sDYY=oa0NZ0M_E15CnH`eUPUfI9D>l4Xj-?N#$EynVm(kxfDY^oj}?4>XnS}VXG-!T1=G^?>fnE+ zxD=CfJ#5FG%7=dc4b91E)DAp!4EGLZ3Y_2GJ2e}QbreBd(+B|;f8~L=0VL890xK#O zwY8xfBVS;3X&6V-7Axcljiry8`biSbTSlbC7nz`)C_lj+<{YL6O>T=erpOQnu+jxL zwUmeOEF`{>r!d@0X>)=|;K(a-B4fuCh^%q7mPeDu->58rQ}fZ@N&C_BksQFH1RWDr zh`#9>YFpJu8G~`%r2ndexJrz7cupp9KG%8T|410z_RX3-Zt}D{aCbq67DepNd=WM6 z;nZ5J&e90f*9?Dy>2i8LG1~{Py6w{Sc9cuo(H`w{j11dQM4lYQuuP7OBQAfhocy?0 zaNj`K_c(+(Px+%n;+VO8jfHMD_S`b{XSY%R^h-zcS|N^0w!_yZ-uw#AcpM$kso)G) zZ}nISyl6U2%j1;2^C(Ma0-`vO(T`3|m1?+e86^s()H6jUX4DEdtnEiOaD_jjnmGfq zNqHgauLj=Ama(PyJd-)mLM-NiDh^z{Z{Fca- z=$INHI3(e&gqYfTTd5NcT3Mayq8tWn^4N=-Aup|ZQYESng#YbyD$OVHT_&$_!_FH% z7F7vC-j=7`w1nsV!W(*lH6mC z1Co?7`Bmc5=?-C=4#(S2Ey9ur0Z_|(TSTd!8BH)C#$HHWvv6}}B@%>DgfCKKSfGxWs5a=zj#u>ON#Ag&^!f^E4>5=^paIsEAuK}-fBU1wzd?-w+xn z`Md}0Wt6ccdr5B22bW8l3+woy^~Op9mQ#8hLTwV#(aaTe9)WMiL<*Ib$qTHb!tbqa z3};uDUm4MyI&K|Saj~QM**Qa+DaGCORM25@P$Ntl5C!4N8d*SuVu4oIntNcp`mf1x zd!+^eTTRbdj zp`0Os7`~E-JfGb(Zxkp2_d3sWx$yUq^oEzgIDse=(L&Nl9LlxL2XxNCNh6x#zu>HJ z5C9gyM1Y8xpxXe6G>*`Z!4@zzcNRj1srlM5rg4kz(+8L7uA;H4WX(|U6*b!ZIcmMIDf)Tg)aIYM@tCTjN)a>|xl$YBl& zY{;ieCg@f50&f+HC_^>2ZAb_NzJD5j_T(;ipD?DgA7*0$(47-8*^fWk=uv1s2@Lu; zGzvKqlv5Wm31T?{&$}$kYXFD80R+#;!-eJna#ql)DBMF?77KNxdKIbH*VroODut{S ztAM|@dIo!)6vAbniYo&F87aV{Q5F|V0wr0GY1%iuK#7`(8~{N=;41GPeLY{e&HdOj z<5Oy9Y9bgSq!D)MgneYq+v%*S10!J+@aW~5@E8ov`wlmpwPbaP5jYUZ%-6K}ONFeQ zo4#IIKY_`4%tfhKMX|R5@+Pbw8#QPo!TXHf6$CNH)Fsn$yztz3b-$=CEbRoIGSs<` ztn+p&bgGS>6A2(H_8_p60EMZ&?L57~uFCQ)kbs$y4kFH0WlwLN9 zy!;0j6%SBsiD;gSaWQL!3=C`9ysd0cgV7;8Ob(kyLzZg#OhcyTf$&H-DaU!{Kn(u+ z)9mI>E=H-LVVtrzoAUzz9WhB9x755$y}QZ=pB_@kSKppgRM2^|AnayK_6UQa*|(#x3kCmx z_;~Vj@jAwX%#h`05r1ATHE9@|SJ~JKECc!|X`YVKm=Wr+oXh14rWc?jRLQ5Aj{0BHkXZqKPiXvu`U!$U$KgVOnP z--LKUC+^M4k2@^RE|if`rLvg=+I=XJ?`@%HnGqtGjaf|XEC5rbi;sDM0|2_xI-Ey2 zg{voJ4fE_USbU6bk5^^qb@+o%FRMOGN&SgsK5<4OYDVT!5IYNjOGPg_2eX21+o6L% zi3 zsg7pzS=aD`p`DNfT#;CoNn%xeE8b?w5Ma|}7`WqCnnFBdxQ=(3ruZLKA=ZKcGbgjOWqnK)vx^Y@MhT`gY((1JWb0=>m(NE4jr zivXGJV=|XQ0$yzGh1{+cPpkVi5JJ>5tx@G!IDaM^VcGJSG}Bv>Pk;spw_IjjPT%*8(CfXL)3C1=OS?0LYRhR{qg?>h+>b=Qo=7?M(KkM)gT%0WXAp#4KyfNm8 z{<)xNJktX1sa0u7A@h~Y2QhuNn$IYSoi9eK2^MPzsvWj-$ogyB2YYHVhMUl(ZJk#r z9|aXj#>BKN*eb;y-iTM8nz6rm_|200&P5UdA(RR zLcF-ax|2Jkcg#f=j&bjo@;qOh)+$Zm`bs<@iw6}~?iBJqm7OP9Q&bqOprPjhrdoRZUS9p3^H|?oOBEL9V3M!)ZW4l;PrDWLW;1Ge0@STs=!-* z6u-daIotqd(*S*+P)G2~h-4wkytXxzVJBPD;_EbukH-WW{I1vLa9d057B|lU=l)x< zG<=YRg4VZFJDR$ZSFWJR zloy@rQS#d1@>27yt?54d_5i&@W-~PUomoGzIGb-kYfRloW^~9v zViVc*XYRRFGwiIrQB&|*K(v|2EIUttmDG3v4>@>@@66Q9%X}-KKkc$h7(!hT92$N2ZU2b?Eu_HyJ?RH`qo-Ep z0M5KQHpcjS2YWo7S1Uoc>j;W0Y5zF%48UH8rnkX*sTF;yLo^d@y)CP1=NW5DfAwgE zFNq#McP|{t*eEsy{qNV0tr!?#@;pkEBFP5$8Y((%aa3k)q34H9tf=Orx6w{fPT!I4k1}C z9tnPsG;k(jMN~(i=0YlAfK(DLvgHlx2Z#4#SUNzV(#e(2=w)>px*ZU3bQT}N{M?>T!rY)_AvHo@!yCT} zRYh-xF~}%!<}82F&Y^5I(81%AbiV~PiaZyj{c-p}fA-1+)Vq+V?*?&xGMmgbl*7)k zA7(nDw=S}Cb?F44rfY&3ds*w@OEde+-;=DK>rsUY*kQYff zwEapP(OsD8b5UT5d8ijdbLTNd>Ta8$Y1wlaylQ(LKe>-US&_5CM21abTyE^J%j^KN zmGGsg3QV>ZMqw&G0K=*=E9( zlEg?pNO#auHMI5FC-n%slH$Ej!D<^Oo(|+3bsQjOf}})@{1;a%)Qw2QS@C0FWF@+n z%%f)10(y)(ggayLOfe$AnyxBoeu{-g*we@rr`gZWvj9G4@KQI-;C03;D2Pl6U5Kq*O_qd1{3N*f{$hQ@1B7B?og(&3yA9#S%sxL z5c9FRk3b($TI7)l=Z$i4m=HHF8PN=BhXoLNU zsR67ZBQUEGd7*A`_b0$kM~YTjs}8k$FBXk$-tZeF zltuGH?ITm1ouhwr3+=dJYAl+AMu_!-H*bH+0MoGVZ2^_p*Qi9JzIZJU{eZzXgXOjjbtKZzT8~`kKpr86 zrtGky(?zazz5G6%qpHTypj#<42l|XHC4`&&Lxj&zhTIlT9%+EWHhJ;jsf2*)+=6*- zWxK}wkyNQ4aJB>zg$vitl)e>!Ll!5kl$2e{2Feyd;`sFq=EkRX87N3worG!8tTNU^ zI~WnoJHc^OOuFS$u{tGy)+(`AKBfVZ{n9+8oq-BnC>W@ST|AU7(@|Fz6cbP)e@aJB zSfW+-BUwkGQ7far;oH9~ zZJ=k^R?-|r!0)(y;KpPPM*Ce1*TgER)?+rt2=DLgl#Cnj7_%SWKt=VjfliI(Yb7~?`(Z?IaLo#4}ls%))T1*`EjJ*s;srtY-P6)EC%VC(r zou!&-1$K8#fmas?-W%=|1F0_0%uL%}?4-$Z3IMh5$Sk6EDca_7mmGGG3}W=~^%I!O z-=%?n)<`us&Qa(oHK#DX`MXunV=Q*(dX-2358y8HpuHbb$BbG zNiBO`NfPIKur*{^efc>+L7TUpYFqu#NsAU&ztUJ23W97D`I~YNR<86u>6HjjRXoJ2 zxskr4jvWxUF_8{=&y~5&=My`t=yO&0X20P5&{0K_g0d+QXS6B~aFIzx# zI*2(arVZ;iq+gWbqYi?Q&s0IvFWjWZ@?L z_P3k$Tej(TdI;7b%$d=3Ix(0@d9Xjs8Z2ML2DeB2HJH zkczJEpY5rwOGyYLiBcmRsa?}78ILXV2vd3( ztm0}jY>B_!!x{v zLI)s)D65hIPFEL|R74#^bZWd192@!K8kLuo(CqIuQteWvx&3)pm9BxVL>6ll&|sN0 z`ve=L!a(Bc=}(!>VuB=PM!FhXZhZ}gX8s)n$r|t zgQel^aF(B(DNuCi;sizldFbPs1A8AQA4NF-jFK?~Rk}xbo}LGy*LX=w zcvgn9qwf~DqwWW~FgTtfoZ*pt+fOG!v!w3Ma(k^sG%&)q=7|v%7?G0dzPJCD`PfhU zIKi{QCLe>1KS)qLA%Z8?m@bg46j8|Pls>Y=D2(^);v`xrr({v9{lkLX0cz8uu$d=f zRN(7j0qPi5Z!gOPT?XzGs7tr7Ep9Nr@}M$JPn(+*f3(>5uRDqU7Zx2{EJ4Y)+J3FgCL1fVhGIIU&Fd>)XZ=ndTCN__N zS_`g*E2ORh$hl%Dm~PQ2q$&xs*Ov}$45S18iAG{RZ(Xtpqdyb&fTK_~iHuEZhz*+!$Y`x>vX<69oO}5YBmJZRihP zzo#JeWVeRd`tw2N=6BeIV^8H-@i4fd|}4E40M%I^Pv%7(=-z z5-nzrmF4+!xmZL>Irxl-0$Dq4or+QA&ygFBw>?8;B4ws_LN z>M1i(?J37+yI9AeMg@K^T<%7=7Xbs~n09pG9lvyw46Up(rGXXE4o*{wSlid`cA!ol z)|SPKAjzA2=KuBUu&+0MR;yuLoHK}|;PjK2ad==a#eTA9tcBl#M6&Z*ikxk25RWaF#rt(Wugq?ShX^2QwGFtjhM^I%c*F;`Tl}jZ_-pHVa-a7_O2bWTIt`_&7%0iHBGhM!?YarNadD zLOrxmLxyE_ol4T~s-7k@uBo~DIJ^@)IU}`T1~Xtv(_-fFZzdcl;;p4PzoBbvChe)>HPdH*wPWV@9fy#$|yRP(mI_yZVc?n3U`d zq73x6O@0vX0M9@4FyE!*1YAAjQ0M~*_(x?*aGpPZxw?14h*DWakOcnB@Pf!<+ep@s>}#Sh)N-hSL$*; z@l&*gL{&LpSw8Y(GY-5i401hi>uI`(E(q5YU_fK&gpy)yxpBny=9djA?^cOfzWenf zg&z}I3Q>8csggh0pO}CYmM`biSPF03@rkyu>BVpdD6u(}2_7>=<3Rdp%-&iU2;tMlLd$o>;%U~m6tB>od!o(zF2^MyA4LL>gg z=wD|W{O8wyF*UMtH2L4)@FO8QWM3WOzTkhx{-5?Yb3ow#gu6I7+u8h^!2d4cFT{Vf zKi8oF@|V5&|1TxI?*A}Q|GdtBTCzWk{yx!=|AhSyyY^2y|BRUVHyzXXzu5ne_?drF z_-D}izbTM}{7vEigs}hrzxEs7zo7q0nDXC8$Ujr2{F@_&-+#dVoj~QEr~J=F(!U9q zcl<@*KgIU{I%&8+$iLbj4SW8r;Xhy}a}yKi|2ejEAUE64yZ`_)P=Eo@{|ouI)=K^d e_Ae5D<$sZv0{yc-_!`fM0N`K!g!=w``~LuC&45S% diff --git a/data/category2labels.json b/data/category2labels.json deleted file mode 100644 index e79fc90..0000000 --- a/data/category2labels.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "A": [ - "主题活动", - "党性学习", - "十九大", - "廉政教育", - "政策实践", - "相关会议", - "重要讲话" - ], - "B": [ - "听取工作汇报", - "换届选举", - "提升基层工作", - "科普人才队伍建设", - "群团工作会议" - ], - "C": [ - "助推新农村文化建设", - "城区精神文明共建", - "实践科技与科普服务平台", - "少数民族村的保护与发展", - "展览献爱心", - "文明单位考评", - "道德讲堂" - ], - "D":[ - "优秀人才评选", - "先进示范", - "双亮双比活动", - "授予荣誉称号", - "自制教具评选", - "表彰", - "评审" - ], - "E":[ - "三长制", - "创业青年座谈会", - "征求意见和建议", - "最美科技工作者", - "科技服务", - "科技者活动日", - "竞赛" - ], - "F":[ - "创新", - "助农", - "发展战略", - "对接发展", - "科技服务平台", - "科技节", - "经济发展新模式" - ], - "G":[ - "三下乡", - "科学素质小组会议", - "科技培训", - "科普服务", - "科普活动", - "科学普及" - ], - "H":[ - "座谈会", - "服务农业", - "服务群众", - "社区共建", - "走访调研调查", - "百汇联百村" - ], - "I":[ - "学会活动" - ] -} diff --git a/data/labels.csv b/data/labels.csv deleted file mode 100644 index 71fe314..0000000 --- a/data/labels.csv +++ /dev/null @@ -1,84 +0,0 @@ -A/主题活动 -A/代表大会 -A/传达学习 -A/党课党会 -A/十九大 -A/学习研讨 -A/工作会议 -A/年度民主生活会 -A/廉政教育组织党员干部参观 -A/开展主题党日活动 -A/报告会 -A/理论学习 -A/组织党员干部参观 -A/组织党员干部观看红色电影 -A/组织学习 -A/组织生活会 -A/贯彻学习 -B/会员大会 -B/全委扩大会议 -B/听取工作汇报 -B/常委会 -B/换届 -B/提升科协基层组织工作 -B/科普人才队伍建设 -B/群团工作会议 -B/选举 -C/参与城区精神文明共建 -C/展览献爱心 -C/志愿 -C/文明单位考评 -C/文明实践科技与科普服务平台 -C/新时代文明实践科技与科普服务平台 -C/道德讲堂 -D/优秀人才评选 -D/先进 -D/征集 -D/授予荣誉称号 -D/示范 -D/表彰 -D/评审 -D/评选 -E/乡镇街道三长制 -E/会员日活动 -E/全国科技工作者日 -E/征求意见和建议 -E/成立科协组织 -E/最美科技工作者 -E/竞赛 -E/高层次人才联系服务 -F/创新驱动助力工程 -F/创新驱动助力工程知识与技能普及培训 -F/培训知识与技能普及培训 -F/学会创新驱动服务站 -F/对接会 -F/对接项目 -F/工作站 -F/推广 -F/服务站 -F/科技创新 -F/科技节 -F/论坛 -F/调研指导 -G/专家乡村学堂讲科普 -G/全民科学素质工作领导小组会议 -G/知识与技能普及培训 -G/知识宣传 -G/知识讲座 -G/科学普及 -G/科技活动周 -G/科技辅导员培训 -G/科技馆 -G/科普基地建设 -G/科普志愿活动 -H/帮扶 -H/座谈会 -H/慰问 -H/扶贫 -H/灾后农业生产自救 -H/知识宣传 -H/精准扶贫 -H/组织专家义诊 -H/脱贫 -H/调查 -H/走访调研 diff --git a/data/train.csv b/data/train.csv deleted file mode 100644 index 5e57500..0000000 --- a/data/train.csv +++ /dev/null @@ -1,343 +0,0 @@ -label|,|ques -A/代表大会|,|肥西县柿树岗乡科协第三次代表大会召开 -A/代表大会|,|肥西县桃花镇召开科协第三次代表大会 -A/主题活动|,|肥西县三河镇举办65”世界环境日主题宣传活动 -A/开展主题党日活动|,|省科协组织机关退离休干部开展两岸情缘”主题党日活动 -A/传达学习,A/十九大|,|各党支部传达学习党的十九大精神进展情况之九 -A/工作会议|,|肥东县召开2019年全县科普工作会议 -A/主题活动|,|省医学保健养生研究会党委组织开展不忘初心 牢记使命”主题系列活动 -A/开展主题党日活动|,|肥东县科协开展不忘初心牢记使命”七一主题党日活动 -A/工作会议|,|肥东县召开2018年全县科普工作会议 -A/贯彻学习|,|肥东县科协贯彻落实全县党建工作会议精神 -A/十九大,A/贯彻学习|,|肥东县科协多措并举学习贯彻党的十九大精神 -A/廉政教育组织党员干部参观|,|肥东县科协组织党员干部参观省党风廉政教育展 -A/组织生活会|,|肥东县科协召开讲重做”专题警示教育民主生活会暨组织生活会 -A/工作会议|,|肥东县科协工作会议召开 -A/主题活动|,|王洵赴合肥开展不忘初心牢记使命”主题教育专题调研活动 -A/传达学习,A/十九大|,|各党支部传达学习党的十九大精神进展情况之四 -A/组织学习|,|机关第三党支部组织开展革命传统教育重温入党誓词 -A/学习研讨|,|2016化石保护研讨会在合肥召开 -A/开展主题党日活动|,|庐江县科协党支部开展科普进社区主题党日活动 -A/代表大会|,|庐江县科学技术协会召开第五次代表大会 -A/报告会,A/理论学习|,|庐江县经济发展与干部学习论坛科普报告会举办 -A/报告会|,|庐江县举办科普报告会 -A/主题活动|,|长丰县科技馆开展清明节环保科普主题系列活动 -A/开展主题党日活动|,|缅怀先烈 牢记使命 长丰县科协赴焦裕禄纪念馆红旗渠开展主题党日活动 -A/主题活动|,|长丰县科协参加宣传贯彻志愿服务条例主题实践暨圆梦微心愿”活动 -A/十九大|,|合肥市科协开展纪念建党96周年和喜迎十九大”系列活动 -A/主题活动|,|省科协食品安全主题日活动走进长丰县岗集镇 -A/党课党会|,|王海彦同志为长丰县岗集镇龙岗社区党员讲党课 -A/主题活动|,|长丰县岗集镇举办食品安全宣传主题演讲比赛 -A/工作会议|,|长丰县岗集镇科协工作会议召开 -A/主题活动|,|合肥市庐阳区防震减灾”主题科普嘉年华活动举办 -A/工作会议|,|合肥市蜀山区科协召开2017年科协工作会议 -A/工作会议|,|合肥市蜀山区科协召开社区科普大学教学工作会议 -A/代表大会|,|合肥市包河区科协第二次代表大会召开 -A/报告会|,|怀远县科协举办农村生态环境与健康生活科普报告会 -A/工作会议|,|固镇县召开2019年全民科学素质工作会议 -A/代表大会|,|五河县科协召开第七次代表大会 -A/工作会议|,|蚌埠市龙子湖区科普工作会议召开 -A/代表大会|,|蚌埠市蚌山区科学技术协会第二次代表大会召开 -A/代表大会|,|桐城市科协第四次代表大会召开 -A/代表大会|,|桐城市科学技术协会召开第四次代表大会 -A/十九大,A/贯彻学习|,|桐城市科协围绕提升三性” 学习贯彻党的十九大精神 -A/十九大,A/贯彻学习|,|桐城市科协召开学习贯彻党的十九大精神专题会 -A/十九大|,|桐城市科协组织收看党的十九大开幕式 -A/组织党员干部观看红色电影|,|桐城市科协组织机关全体党员干部观看榜样 -A/传达学习|,|桐城市科协传达学习省科协九届六次全委会议精神 -A/主题活动|,|宿松县开展低碳生活绿色出行”主题宣传活动 -A/代表大会|,|宿松县科协第三次代表大会召开 -A/年度民主生活会|,|怀宁县科协党组召开讲重作”专题警示教育民主生活会 -A/组织党员干部参观|,|怀宁县科协组织党员科技工作者服务团开展听民声送技术活动 -B/会员大会|,|庐江县举办茶叶协会二届二次会员大会暨庐江县茶叶公共品牌培训会 -B/全委扩大会议|,|庐江县科协四届六次全委会议召开 -B/全委扩大会议|,|长丰县科协召开三届二次全委会议 -B/全委扩大会议|,|怀远县科协召开五届三次全委会议 -B/全委扩大会议|,|五河县科协七届三次全委会议召开 -B/全委扩大会议|,|桐城市科协召开四届二次全委会暨全民科学素质工作会议 -B/换届,B/选举|,|桐城市养猪协会召开第三届换届选举大会 -B/全委扩大会议|,|桐城市科协召开三届六次全委会议 -B/全委扩大会议|,|安庆市大观区科协三届二次全委会议召开 -B/全委扩大会议|,|安庆市迎江区科协学习贯彻省科协九届八次全委会等会议精神 -B/提升科协基层组织工作|,|铜陵市义安区全面完成科协基层组织31”吸纳工作 -B/全委扩大会议|,|铜陵市郊区科协召开四届二次全委会 -B/会员大会|,|黄山市休宁县老科协第二次会员大会召开 -B/常委会,B/听取工作汇报|,|中共休宁县委常委会听取科协工作汇报 -B/全委扩大会议|,|休宁县科协召开五届六次全委扩大会议 -B/换届|,|黄山市祁门县抓好科协组织换届工作三长”进入基层科协履职 -B/全委扩大会议|,|祁门县科协六届五次全委会议召开 -B/全委扩大会议|,|祁门县科协召开六届四次全委会议 -B/全委扩大会议|,|黟县科协七届二次全委扩大会议暨2019年县全民科学素质工作会议召开 -B/全委扩大会议|,|黟县科协学习贯彻省科协九届九次全委会议精神 -B/全委扩大会议|,|黄山市黄山区科协五届五次全委扩大会议暨2019年全区乡镇科协工作会议召开 -B/听取工作汇报|,|黄山区召开群团改革工作汇报会 -B/全委扩大会议|,|黄山市黄山区科协学习贯彻省科协九届九次全委会议精神 -B/全委扩大会议|,|黄山市徽州区科协四届六次全委会议召开 -B/全委扩大会议|,|黄山市徽州区科协四届五次全委会议召开 -B/科普人才队伍建设|,|黄山市徽州区科协四措并举”抓人才队伍建设 -B/全委扩大会议|,|黄山市徽州区科协四届四次全委会议召开 -B/换届|,|黄山市徽州区圆满完成乡镇科协换届工作 -B/全委扩大会议|,|石台县科协四届二次全委会议召开 -B/全委扩大会议|,|广德县科协学习贯彻省科协九届六次全委会议精神 -B/全委扩大会议|,|绩溪县科协召开五届二次全委会议 -B/全委扩大会议|,|霍邱县科协八届三次全委会议召开 -B/提升科协基层组织工作|,|六安市裕安区召开提升科协基层组织力31”工作推进会 -B/常委会,B/听取工作汇报|,|宿州市埇桥区委常委会专题听取区科协工作汇报 -B/换届|,|宿松县召开科协系统深化改革和县乡科协换届工作部署会 -B/全委扩大会议|,|濉溪县科协召开四届九次全委扩大会议 -B/群团工作会议|,|全椒县科协认真贯彻落实全市群团工作者培训班会议精神 -B/全委扩大会议|,|来安县科协召开四届二次全委扩大会议 -B/群团工作会议|,|定远县科协学习贯彻省委群团工作会议精神 -C/文明实践科技与科普服务平台|,|安徽省科协 安徽省文明办关于开展新时代文明实践中心科技志愿服务工作的通知 -C/新时代文明实践科技与科普服务平台,C/志愿|,|宿松县新时代文明实践中心科技志愿服务队开展水源垃圾清理活动 -C/展览献爱心|,|太湖县中学生参加 2017年参观科技展览有奖征文暨科技夏令营”获佳绩 -C/展览献爱心|,|黄山市屯溪区党建引领科普爱心漂流书屋”活动启动 -C/参与城区精神文明共建|,|黄山市徽州区科协助力全国文明城市创建工作 -C/文明实践科技与科普服务平台|,|广德县科协召开文明实践科技与科普服务平台推进会 -C/参与城区精神文明共建|,|广德县科协赴联点共建社区开展文明创建工作 -C/文明实践科技与科普服务平台|,|金寨县科协开展新时代文明实践科学传播活动 -C/文明单位考评|,|六安市金寨县科协积极开展文明单位志愿服务进小区活动 -C/道德讲堂|,|来安县科协开展道德讲堂活动 -C/参与城区精神文明共建|,|定远县科协发挥科协优势助推社区文明创建 -D/表彰|,|肥西县科协召开科普工作表彰暨全县科协工作座谈会 -D/示范|,|关于20182022年度安徽省示范农村专业技术协会认定名单的通知 -D/示范|,|20182022年度安徽省示范农村专业技术协会拟认定名单公示 -D/优秀人才评选|,|肥西县第三届完成青少年科技创新县长奖及优秀科技辅导员评选 -D/授予荣誉称号|,|合肥市科协荣获2017年全国科普日活动优秀组织单位 -D/表彰,D/优秀人才评选|,|合肥市预防医学会开展结核病防治志愿者优秀个人与集体评选表彰活动 -D/示范|,|关于命名20162020年度安徽省科普示范县市区的决定 -D/先进|,|坚定信仰 乐于奉献──记省科协离退休老干部支部书记贾轩伟先进事迹 -D/授予荣誉称号|,|安徽省科协获得全国农民科学素质网络知识竞答省级优秀组织单位”称号 -D/示范|,|关于20172021年度安徽省科普示范社区认定名单的通知 -D/示范|,|肥东县发挥基层科普行动计划”项目示范带动作用 -D/示范|,|我省18个县市区被命名为首批20162020年度全国科普示范县市区 -D/示范|,|关于命名20162020年度安徽省科普示范县市区的决定 -D/示范|,|关于20172021年度安徽省示范农村专业技术协会认定名单的通知 -D/表彰|,|庐江县科协表彰2016年优秀科技工作者 -D/表彰|,|我省20家单位2015年全国科普日”活动受到中国科协表彰 -D/示范|,|关于命名20162020年度安徽省科普示范县市区的决定 -D/授予荣誉称号|,|长丰县科协科普宣传惠民生项目荣获2018年度长丰县优秀志愿服务项目”称号 -D/表彰|,|长丰县召开第二届青少年科技创新县长奖表彰大会 -D/表彰|,|长丰县举办首届青少年科技创新县长奖表彰会 -D/表彰|,|合肥市瑶海区举行青少年科技创新区长奖表彰大会 -D/示范|,|合肥市庐阳区科普示范创建工作喜获丰收 -D/征集|,|关于转发中国科协改革工作办公室关于开展礼赞新中国追梦新时代”改革创新案例征集活动的通知的通知 -D/示范|,|关于命名20162020年度安徽省科普示范县市区的决定 -D/表彰|,|怀远县科协系统表彰会召开 -D/示范|,|我省18个县市区被命名为首批20162020年度全国科普示范县市区 -D/示范|,|关于命名20162020年度安徽省科普示范县市区的决定 -D/先进,D/授予荣誉称号|,|怀宁县科协纪检组获纪检监察工作先进集体”称号 -D/表彰|,|歙县第二届青少年科技创新奖表彰会召开 -D/优秀人才评选|,|黄山区全面启动首届百名优秀人才评选活动 -D/优秀人才评选,D/评选|,|黄山区全面启动首届百名优秀人才评选活动 -D/表彰|,|黄山市黄山区5名基层一线科技工作者获五一”表彰 -D/优秀人才评选|,|黄山市徽州区科协委员桂利权获评黄山市第三批专业技术拔尖人才 -D/表彰|,|黄山市徽州区表彰第三届青年科技奖获奖者 -D/评审|,|宁国市举办第十五届青少年科技创新大赛作品评审会 -D/评审|,|泾县召开青少年科技创新大赛2018年度表彰暨2019年度动员会 -D/表彰|,|泾县2016年度青少年科技创新大赛表彰暨2017年度动员大会召开 -D/评审|,|六安市金寨县开展2017年基层科普行动计划”项目评审 -D/示范|,|金寨县开展创建全省科普示范县工作专项督查 -D/表彰|,|六安市金安区举办第二届青少年科技创新大赛表彰暨第三届青少年科技创新大赛启动式 -D/表彰|,|马鞍山市花山区优秀科技工作者表彰会召开 -D/表彰|,|和县科协社会组织党建工作获表彰 -D/示范|,|和县科协组织召开全县水稻麦茬免耕直播千亩示范现场会 -D/示范|,|当涂县召开创建省科普示范县工作调度会 -D/示范|,|濉溪县科协举办优秀学术论文颁奖农村科普示范基地授牌仪式 -D/表彰|,|颍上县召开2015年全国农民科学素质网络知识竞赛总结表彰会 -D/表彰|,|天长市举办第五届青少年科技创新市长奖”表彰大会 -D/表彰|,|天长市委市政府表彰第二届千秋英才奖” -D/示范|,|天长市新增五所科普示范学校” -D/示范|,|天长市科普示范基地首家农业院士工作站挂牌成立 -D/示范|,|明光市开展防震减灾科普示范学校评选活动 -D/表彰,D/优秀人才评选,D/先进|,|全椒县评选表彰基层科普行动计划”全民科学素质工作”先进集体和先进个人 -D/先进|,|定远县科协获评全县六五”普法依法治理工作先进单位 -E/竞赛|,|肥西县首届青少年信息学竞赛圆满结束 -E/竞赛|,|肥东县举办第二届青少年机器人竞赛 -E/竞赛|,|肥东县召开2015年全国科普日暨农民科学素质网络竞赛活动动员会 -E/竞赛|,|巢湖市举办第十六届少儿智力七巧科技”系列竞赛活动 -E/竞赛|,|我省代表队荣获第十七届全国中小学生电脑制作活动机器人BOTBALL竞赛冠军 -E/全国科技工作者日|,|庐江县老科技工作者日诗歌朗诵会举办 -E/高层次人才联系服务|,|长丰县创新创业高层次人才协会成立 -E/竞赛|,|庐阳区举办第四届青少年机器人竞赛活动 -E/竞赛|,|合肥市庐阳区开展首届防震减灾知识微竞赛 -E/全国科技工作者日|,|怀远县科协召开全国科技工作者日”暨科学技术交流会 -E/征求意见和建议|,|安徽省科协集中深入19个县市区征求意见建议 -E/全国科技工作者日|,|桐城市突出五大主题部署全国科技工作者日”庆祝活动 -E/最美科技工作者|,|桐城市开展最美科技工作者”学习宣传活动 -E/成立科协组织|,|桐城师范高等专科学校成立科协 -E/乡镇街道三长制|,|黄山市休宁县科协推进三长制”落实加强基层组织建设 -E/全国科技工作者日|,|祁门县科协系列活动庆祝全国科技工作者日 -E/全国科技工作者日|,|黟县科协开展全国科技工作者日”系列活动之科技工作者回馈社会 -E/最美科技工作者|,|黄山区科协四个强化”抓实最美科技工作者”学习宣传 -E/竞赛|,|2019年东至县中学生气象科普作品创作竞赛圆满结束 -E/全国科技工作者日|,|绩溪县科协多举措开展全国科技工作者日”活动 -E/竞赛|,|霍山县科协举办全民科学素质知识竞赛 -E/征求意见和建议|,|六安市金寨县出台加强全县青少年科技教育工作的意见 -E/竞赛|,|六安市裕安区科协开展全国农民科学素质知识竞赛 -E/全国科技工作者日|,|灵璧县科协系列活动庆祝全国科技工作者日” -E/全国科技工作者日|,|宿州市埇桥区科协系列活动庆祝全国科技工作者日” -E/竞赛|,|马鞍山市花山区举办2016年七巧科技竞赛 -E/全国科技工作者日|,|和县举办第三个全国科技工作者日系列活动 -E/会员日活动|,|濉溪县科协开展中国科协会员日活动 -E/全国科技工作者日|,|阜南县科协全国科技工作者日”寄语征集活动圆满结束 -E/全国科技工作者日|,|阜阳市临泉县科协开展新时代首个全国科技工作者日”系列活动 -E/竞赛|,|界首市科协召开全市公民科学素质网络知识竞赛”动员部署会 -E/会员日活动|,|颍上县科协开展中国科协会员日”活动 -E/竞赛|,|颍州区召开全国农民科学素质和省全民科学素质网络知识竞赛推进会 -E/竞赛|,|明光市科协组织开展农民科学素质网络知识竞赛 -E/竞赛|,|全椒县举办2019年全民科学素质知识竞赛 -E/竞赛|,|全国青少年航模竞赛安徽赛区在凤阳县闭幕 -E/征求意见和建议|,|定远县科协召开科协系统深化改革实施方案征求意见座谈会 -E/征求意见和建议|,|定远县科协学习贯彻关于加强和改进党的群团工作的意见 -F/科技创新|,|肥西县召开第四届青少年科技创新县长奖颁奖大会 -F/科技节|,|肥西县召开第三届科技节暨肥西县第七届青少年航空航天航海车辆建筑模型锦标赛 -F/科技创新|,|肥西县召开第三届青少年科技创新县长奖颁奖大会 -F/科技创新|,|肥西县开展第八届青少年科技创新大赛科幻画评比 -F/科技节|,|肥西县第二届青少年科技节暨第六届青少年航空航天航海车辆建筑模型锦标赛举办 -F/科技创新|,|肥西县第二届青少年科技创新县长奖颁奖会召开 -F/工作站|,|合肥市农学会在肥西金牛蚕桑合作社设立专家工作站 -F/科技节|,|肥西县首届青少年科技节开幕 -F/科技创新|,|肥西县举办首届青少年科技创新县长奖颁奖会 -F/科技创新|,|肥东县召开第二届青少年科技创新县长奖颁奖大会 -F/科技节|,|肥东桥头集学校举办首届环保科技节活动 -F/科技创新|,|肥东县举办首届青少年科技创新大赛 -F/调研指导|,|肥东县科协检查指导白龙镇科协工作 -F/调研指导|,|中国老科协调研组调研指导肥东县老科协工作 -F/学会创新驱动服务站|,|巢湖市青少年创客科技教育协会成立 -F/论坛|,|省科协召开皖台科技论坛项目对接交流会 -F/科技创新|,|巢湖市科协举办灯塔社区青少年科幻画比赛 -F/科技创新|,|庐江县第十二届青少年科技创新大赛科幻画类作品评选结束 -F/科技创新|,|庐江县第五届青少年科技创新县长奖暨第十一届青少年科技创新大赛颁奖大会举行 -F/科技创新|,|庐江县第四届青少年科技创新县长奖暨第十届青少年科技创新大赛颁奖会召开 -F/科技创新|,|庐江县科协举办2016年度科普知识科技创新进校园”巡展活动 -F/论坛|,|2016中国·安徽健康产业高峰论坛举办 -F/科技创新|,|庐江县开展科普知识科技创新进校园”巡展活动 -F/科技创新|,|长丰县科技馆成为一中学生科技创新活动基地 -F/培训知识与技能普及培训|,|长丰县科技馆开展第四期创客培训班 -F/推广|,|长丰县瓜蒌种植协会开展技术推广培训 -F/科技创新|,|感受科技创新的力量 长丰县科技馆机器人亮相芜湖科博会 -F/论坛|,|长丰县造甲乡龙虾协举办龙虾产业与乡村旅游发展论坛” -F/调研指导|,|长丰县领导深入县科协调研指导工作 -F/科技创新|,|合肥市瑶海区颁发第三届青少年科技创新区长奖 -F/科技创新|,|合肥市庐阳区首届青少年科技创新区长奖颁奖会召开 -F/科技创新|,|合肥市蜀山区第九届青少年科技创新区长奖颁奖会召开 -F/科技创新|,|合肥市蜀山区举行第八届青少年科技创新区长奖颁奖仪式 -F/创新驱动助力工程|,|省科协组织专家赴芜湖县开展中国科协创新驱动助力工程示范项目实施工作 -F/培训知识与技能普及培训|,|芜湖市科协举办2017年全市农技协转型升级培训班 -F/对接项目|,|中国可持续发展研究会暨中国生物多样性保护与绿色发展基金会到芜湖开展创新驱动助力对接活动 -F/创新驱动助力工程知识与技能普及培训|,|安徽省科协创新驱动助力工程推进会 暨学会业务培训会在芜湖召开 -F/服务站|,|中国可持续发展研究会南陵服务站”揭牌仪式举行 -F/科技创新|,|无为县举行第八届青少年科技创新大赛开幕式暨第三届青少年科技创新县长奖颁奖典礼 -F/论坛|,|芜湖市镜湖区成功举办第二届合芜蚌”青少年机器人邀请赛暨首届人工智能+教育”高峰论坛 -F/对接会|,|凤台-石台科协系统精准扶贫对接会召开 -F/科技创新|,|蚌埠市龙子湖区首届区青少年科技创新奖揭晓 -F/科技创新|,|宿松县召开青少年科技创新工作推进会 -F/科技创新|,|宿松县召开科技创新智库建设工作推进会 -F/调研指导|,|太湖县科协深入乡镇督查指导全民科学素质纲要考核工作 -F/科技创新|,|岳西县开展2018年青少年科技创新大赛作品评选 -F/科技节|,|迎江区四照园小学校园科技节赋予六一”新内涵 -F/调研指导|,|歙县科协深入雄村学校指导开展2019年科学调查体验活动 -F/科技创新|,|祁门县举办邦耀电子杯”中小学生科技创新大赛 -F/调研指导|,|祁门县科协组织农技人员走进果园指导果农管理果树 -F/调研指导|,|祁门县塔坊镇科协积极组织农技人员深入田间地头指导农户进行油菜安全越冬防冻管理 -F/调研指导|,|祁门县组织科技工作队下乡进村指导农民抗灾自救 -F/科技创新|,|祁门县2016年中小学生科技创新大赛举办 -F/科技创新|,|黄山市屯溪区2019年青少年科技创新大赛落下帷幕 -F/科技节|,|黄山市黄山区举行2015年全国科普日主场活动暨第二届青少年科技节活动启动仪式 -G/科学普及,G/知识宣传|,|肥西县紫蓬镇举办慢性病健康知识进社区活动 基层组织 -G/科学普及,G/知识宣传|,|肥西县紫蓬镇举行2019年全国科普日”启动仪式暨宣传活动 -G/知识与技能普及培训|,|肥西县紫蓬镇举办特色种养业技术培训班 -G/科学普及|,|肥西县三河镇举行2019年全国科普日启动仪式 -G/科学普及|,|肥西县科普讲师团专家走进三河镇 -G/科学普及,G/知识宣传|,|合肥市肥西县紫蓬镇开展健康科普知识竞赛活动 -G/科学普及|,|肥西县上派镇肥光社区开展暑期科普进社区”活动 -G/知识与技能普及培训|,|肥西县花木协会举办乡村企业家实用人才培训班 -G/科学普及|,|肥西县派河社区开展暑期科普兴趣班” -G/全民科学素质工作领导小组会议|,|合肥市召开2019年全民科学素质工作领导小组会议 -G/科学普及,G/知识宣传|,|肥西县柿树岗乡科协开展禁毒科普宣讲活动 -G/专家乡村学堂讲科普|,|百名专家乡村学堂讲科普”活动走进三河镇中心学校 -G/科学普及,G/知识宣传|,|肥西县上派镇科协开展世界地球日”科普主题活动 -G/科学普及|,|肥西县科协开展2019年科普赶集”活动 -G/科普基地建设|,|肥西县地震科普馆通过验收 -G/科学普及|,|肥西县上派镇科协开展农村少儿爱科学”科普活动 -G/科学普及|,|肥西县南郢社区开展科普系列活动 -G/科普基地建设|,|肥西县科协开展科普画廊验收工作 -G/科学普及,G/知识宣传|,|肥西县上派镇紫蓬社区开展主题科普活动 -G/科学普及,G/知识讲座|,|肥西县科协举办安徽省科协科学传播专家团全省巡讲活动暨肥西县科协委员读书班 -G/科技辅导员培训|,|肥西县科协举办青少年科技创新大赛辅导员培训班 -G/科技辅导员培训|,|庐江县食用菌协会培训农村食用菌特色循环种植技术扶贫实用人才 -G/科学普及|,|肥西县开展2018年全国科普日启动仪式暨主场活动 -G/知识与技能普及培训|,|肥西县举办苗木花卉产业实用人才技能提升培训班 -G/科学普及,G/知识讲座|,|肥东县长临河镇苗木协会开展科普大讲堂活动 -G/知识与技能普及培训|,|肥西县新型职业农民水稻产业培训班开班 -G/知识与技能普及培训|,|关于转发中国科协科普部关于2017年科普人员培训班报名的通知的通知 -G/科技活动周|,|肥西县科协组织留守儿童赴中科大参加科技周活动 -G/知识与技能普及培训|,|肥西县举办企业创新方法培训活动 -G/科技馆|,|关于做好2017年度中国流动科技馆巡展工作的通知 -G/科普基地建设|,|肥西县紫蓬山管委会科协推动科普画廊建设 -G/知识与技能普及培训|,|肥东县长临河镇苗木协会举办2016年秋季苗木专业技术培训班 -G/科学普及|,|肥西县养猪协会开展2016年全国科普日活动 -G/知识讲座|,|肥西县举办领导干部大讲堂 -G/科技辅导员培训|,|肥西县珍稀食用菌协会开展农村实用人才培训 -G/知识讲座|,|肥西县苗木信息与造型技术大讲堂开班 -G/知识宣传,G/科学普及|,|肥西县科协开展秸秆禁烧”科普宣传 -G/科普基地建设|,|肥西县科协规范社区科普大学教学点建设 -G/专家乡村学堂讲科普|,|百名专家乡村学堂讲科普”走进肥西长镇中学 -G/科普基地建设|,|肥西县科协推进社区科普大学建设 -G/科普志愿活动|,|肥西县科协学雷锋志愿服务活动”启动 -G/科学普及|,|肥东县白龙镇举办2019年全国科普日科普进校园”活动 -G/科学普及|,|肥东县长临河镇开展2019年全国科普日活动 -G/知识讲座|,|肥东县长临社区科普大学举办中老年营养与膳食讲座 -G/知识宣传|,|肥东县元疃镇开展禁毒科普宣传活动 -G/知识讲座|,|肥东县白龙镇科协开展畜禽养殖业病害防治知识讲座 -G/科学普及|,|肥东县科协召开科普工作推进会 -G/科学普及|,|肥东县光大社区开展趣味小实验”儿童科普活动 -G/科学普及,G/知识讲座|,|肥东县科普大讲堂走进白龙镇双庙社区 -H/精准扶贫,H/座谈会|,|肥西县科协召开扶贫对口联系工作座谈会 -H/走访调研|,|杭州市科协来合肥市调研科普工作 -H/精准扶贫|,|精准扶贫健康科普进乡村活动仪式在肥西丰乐镇举办 -H/精准扶贫|,|合肥市科协到临泉阜南对接科技助力精准扶贫工程 -H/脱贫|,|霍邱县和肥东县科协为贫困学生开展科学文化之旅及农村少儿爱科学活动 -H/脱贫,H/组织专家义诊|,|省医学保健养生研究会党委组织开展健康扶贫义诊活动 -H/帮扶|,|肥东县白龙镇科协为果蔬种植户送防寒保收秘诀” -H/走访调研|,|肥东县督查调研乡镇科协工作 -H/走访调研|,|肥东县科协部署农技协专项资金使用管理自查自纠和乡镇科协督查调研工作 -H/精准扶贫|,|肥东科协助推企业科技精准扶贫 -H/慰问|,|肥东县老科协慰问有突出贡献的优秀老科技工作者 -H/调查|,|肥东县开展青少年科学调查体验活动 -H/座谈会|,|巢湖市科协组织召开礼赞共和国 智慧新生活”科技工作者座谈会 -H/走访调研|,|魏军锋到巢湖市科协调研 -H/座谈会|,|合肥市科协召开学会工作座谈会 -H/慰问|,|庐江县开展走访慰问科技工作者代表活动 -H/组织专家义诊|,|全国第二十四届肿瘤防治抗癌周大型义诊活动庐江县站 -H/精准扶贫|,|庐江县食用菌协会召开精准扶贫工作推进会 -H/脱贫|,|庐江县科协开展脱贫攻坚工作 -H/慰问|,|庐江县食用菌协会开展春节慰问困难会员活动 -H/精准扶贫|,|庐江县食用菌协会精准扶贫工作受合肥市政协领导高度肯定 -H/灾后农业生产自救|,|庐江县食用菌协会积极开展灾后科普服务 -H/知识宣传,H/灾后农业生产自救|,|庐江县郭河镇科协积极开展灾后科普宣传 -H/慰问|,|长丰县领导慰问科技工作者代表 -H/慰问|,|蔡士祥到长丰县岗集镇看望省科协挂职干部慰问困难党员 -H/座谈会|,|长丰县召开青少年科技创新工作座谈会 -H/脱贫|,|长丰县科协送科技到包联贫困户 -H/走访调研|,|长丰县政协开展科普工作专题调研活动 -H/帮扶|,|王海彦赴长丰县岗集镇调研江淮分水岭对口帮扶工作 -H/走访调研|,|王洵赴长丰县岗集镇调研江淮分水岭综合开发治理工作 -H/帮扶|,|六安市裕安区科协赴合肥市包河区科技局开展扶贫结对帮扶活动 -H/调查|,|2018年芜湖市青少年科学调查体验活动启动 -H/调查|,|芜湖县赵桥小学举行2018年青少年科学调查体验活动启动仪式 -H/走访调研|,|中国科协农技中心农技协发展处来皖调研芜湖农技协工作 -H/精准扶贫|,|芜湖市科协发挥农技协优势科技助力精准扶贫 -H/调查|,|芜湖市2016年全国青少年科学调查体验活动启动仪式举行 -H/组织专家义诊|,|无为县护理学会开展纪念512国际护士节大型义诊活动 -H/帮扶|,|安庆市望江县科协赴芜湖市镜湖区对接帮扶工作 -H/精准扶贫|,|科技助力 精准扶贫” 合肥蜀山科协精准科普进寿县 -H/慰问|,|五河县领导看望慰问优秀科技工作者代表 -H/调查|,|桐城市东关小学2019青少年科学调查体验活动启动 -H/扶贫|,|桐城创新机制 提升科技扶贫实效 -H/慰问|,|桐城市领导走访慰问基层一线科技工作者 -H/走访调研|,|中国科协科普部基层处调研安庆市科普信息化落地应用工作 -H/走访调研|,|桐城市科协组队赴浙江余姚考察学习 -H/脱贫|,|桐城市科协召开脱贫攻坚推进会 -H/脱贫|,|桐城市科协党员活动日”开展脱贫攻坚入户走访 diff --git a/data/valid.csv b/data/valid.csv deleted file mode 100644 index eefe754..0000000 --- a/data/valid.csv +++ /dev/null @@ -1,63 +0,0 @@ -label|,|ques -A/十九大,A/贯彻学习|,|桐城市科协召开学习贯彻党的十九大精神专题会 -A/十九大|,|桐城市科协组织收看党的十九大开幕式 -A/组织党员干部观看红色电影|,|桐城市科协组织机关全体党员干部观看榜样 -A/传达学习|,|桐城市科协传达学习省科协九届六次全委会议精神 -A/主题活动|,|宿松县开展低碳生活绿色出行”主题宣传活动 -A/代表大会|,|宿松县科协第三次代表大会召开 -A/代表大会|,|肥西县桃花镇召开科协第三次代表大会 -A/年度民主生活会|,|怀宁县科协党组召开讲重作”专题警示教育民主生活会 -A/组织党员干部参观|,|怀宁县科协组织党员科技工作者服务团开展听民声送技术活动 -B/会员大会|,|庐江县举办茶叶协会二届二次会员大会暨庐江县茶叶公共品牌培训会 -B/全委扩大会议|,|庐江县科协四届六次全委会议召开 -B/全委扩大会议|,|长丰县科协召开三届二次全委会议 -B/换届|,|宿松县召开科协系统深化改革和县乡科协换届工作部署会 -B/全委扩大会议|,|濉溪县科协召开四届九次全委扩大会议 -B/群团工作会议|,|全椒县科协认真贯彻落实全市群团工作者培训班会议精神 -B/全委扩大会议|,|来安县科协召开四届二次全委扩大会议 -B/群团工作会议|,|定远县科协学习贯彻省委群团工作会议精神 -C/文明实践科技与科普服务平台|,|安徽省科协 安徽省文明办关于开展新时代文明实践中心科技志愿服务工作的通知 -C/新时代文明实践科技与科普服务平台,C/志愿|,|宿松县新时代文明实践中心科技志愿服务队开展水源垃圾清理活动 -C/展览献爱心|,|太湖县中学生参加 2017年参观科技展览有奖征文暨科技夏令营”获佳绩 -D/示范|,|天长市科普示范基地首家农业院士工作站挂牌成立 -D/示范|,|明光市开展防震减灾科普示范学校评选活动 -D/表彰,D/优秀人才评选,D/先进|,|全椒县评选表彰基层科普行动计划”全民科学素质工作”先进集体和先进个人 -D/先进|,|定远县科协获评全县六五”普法依法治理工作先进单位 -D/表彰|,|马鞍山市花山区优秀科技工作者表彰会召开 -D/表彰|,|和县科协社会组织党建工作获表彰 -D/示范|,|和县科协组织召开全县水稻麦茬免耕直播千亩示范现场会 -D/示范|,|当涂县召开创建省科普示范县工作调度会 -D/示范|,|濉溪县科协举办优秀学术论文颁奖农村科普示范基地授牌仪式 -D/表彰|,|颍上县召开2015年全国农民科学素质网络知识竞赛总结表彰会 -D/表彰|,|天长市举办第五届青少年科技创新市长奖”表彰大会 -D/表彰|,|天长市委市政府表彰第二届千秋英才奖” -E/竞赛|,|肥西县首届青少年信息学竞赛圆满结束 -E/竞赛|,|肥东县举办第二届青少年机器人竞赛 -F/调研指导|,|祁门县组织科技工作队下乡进村指导农民抗灾自救 -F/科技创新|,|祁门县2016年中小学生科技创新大赛举办 -F/科技创新|,|黄山市屯溪区2019年青少年科技创新大赛落下帷幕 -F/科技创新|,|庐江县第五届青少年科技创新县长奖暨第十一届青少年科技创新大赛颁奖大会举行 -F/科技创新|,|庐江县第四届青少年科技创新县长奖暨第十届青少年科技创新大赛颁奖会召开 -F/科技创新|,|庐江县科协举办2016年度科普知识科技创新进校园”巡展活动 -F/科技节|,|黄山市黄山区举行2015年全国科普日主场活动暨第二届青少年科技节活动启动仪式 -G/科学普及,G/知识宣传|,|肥西县紫蓬镇举办慢性病健康知识进社区活动 基层组织 -G/科学普及,G/知识宣传|,|肥西县紫蓬镇举行2019年全国科普日”启动仪式暨宣传活动 -G/知识与技能普及培训|,|肥西县紫蓬镇举办特色种养业技术培训班 -G/科学普及|,|肥西县三河镇举行2019年全国科普日启动仪式 -G/科学普及|,|肥西县科普讲师团专家走进三河镇 -G/科学普及,G/知识宣传|,|合肥市肥西县紫蓬镇开展健康科普知识竞赛活动 -G/科学普及|,|肥西县科协开展2019年科普赶集”活动 -G/科普基地建设|,|肥西县地震科普馆通过验收 -G/科学普及|,|肥西县上派镇科协开展农村少儿爱科学”科普活动 -G/科学普及|,|肥西县南郢社区开展科普系列活动 -H/组织专家义诊|,|无为县护理学会开展纪念512国际护士节大型义诊活动 -H/帮扶|,|安庆市望江县科协赴芜湖市镜湖区对接帮扶工作 -H/精准扶贫|,|科技助力 精准扶贫” 合肥蜀山科协精准科普进寿县 -H/慰问|,|五河县领导看望慰问优秀科技工作者代表 -H/调查|,|桐城市东关小学2019青少年科学调查体验活动启动 -H/扶贫|,|桐城创新机制 提升科技扶贫实效 -H/慰问|,|桐城市领导走访慰问基层一线科技工作者 -H/走访调研|,|中国科协科普部基层处调研安庆市科普信息化落地应用工作 -H/走访调研|,|桐城市科协组队赴浙江余姚考察学习 -H/脱贫|,|桐城市科协召开脱贫攻坚推进会 -H/脱贫|,|桐城市科协党员活动日”开展脱贫攻坚入户走访 \ No newline at end of file diff --git a/data_preprocess/data_excel2csv.py b/data_preprocess/data_excel2csv.py index 570f27b..e87a274 100644 --- a/data_preprocess/data_excel2csv.py +++ b/data_preprocess/data_excel2csv.py @@ -4,62 +4,203 @@ # @author : zh-atom # @function: -from keras_textclassification.data_preprocess.text_preprocess import load_json, save_json, txt_read -from keras_textclassification.conf.path_config import path_model_dir -from keras_textclassification.conf.path_config import path_train, path_valid, path_label, path_root -from tqdm import tqdm -import pandas as pd -import numpy as np -import json import os import re +import random +import jieba +import json +import pandas as pd +import numpy as np +from gensim.models import Word2Vec +from conf.path_config import path_train, path_valid, path_label, path_tests, path_category, path_dataset, \ + path_edata, path_embedding_vector_word2vec_word, path_embedding_random_word, path_embedding_vector_word2vec_word_bin, \ + path_embedding_random_char, path_embedding_vector_word2vec_char_bin, path_embedding_vector_word2vec_char, path_embedding_user_dict + +str_split = '|,|' + +class preprocess_excel_data: + def __init__(self): + self.corpus = [] + self.corpus_labels = [] + self.corpus_titles = [] -def removePunctuation(content): - """ - 文本去标点 - """ - punctuation = r"~!@#$%^&*()_+`{}|\[\]\:\";\-\\\='<>?,.,。、《》?;:‘“{【】}|、!@#¥%……&*()——+=-" - content = re.sub(r'[{}]+'.format(punctuation), '', content) - - if content.startswith(' ') or content.endswith(' '): - re.sub(r"^(\s+)|(\s+)$", "", content) - return content.strip() - -def excel2csv(): - labels = [] - trains = ['label|,|ques'] - data = pd.read_excel(os.path.dirname(path_train)+'/02-anhui.xlsx') - data = np.array(data) - data = data.tolist() - for s_list in data: - print(s_list) - label_tmp = removePunctuation(s_list[5]) - if ' ' in label_tmp: - train_tmp = [] - label_tmp = label_tmp.split(' ') + def removePunctuation(self, content): + """ + 文本去标点 + """ + punctuation = r"~!@#$%^&*()_+`{}|\[\]\:\";\-\\\='<>?,.,。、《》?;:‘""“”{【】}|、!@#¥%……&*()——+=- " + content = re.sub(r'[{}]+'.format(punctuation), '', content) + + if content.startswith(' ') or content.endswith(' '): + re.sub(r"^(\s+)|(\s+)$", "", content) + return content.strip() + + def list_all_files(self, rootdir): + import os + _files = [] + # 列出文件夹下所有的目录与文件 + list_file = os.listdir(rootdir) + + for i in range(0, len(list_file)): + # 构造路径 + path = os.path.join(rootdir, list_file[i]) + # 判断路径是否是一个文件目录或者文件 + # 如果是文件目录,继续递归 + if os.path.isdir(path): + _files.extend(self.list_all_files(path)) + if os.path.isfile(path): + _files.append(path) + return _files + + def label_check(self, category, label): + # 读 category2labels.json文件,校验 类别-标签 是否匹配 + with open(path_category, 'r', encoding='utf-8') as f_c2l: + c2l_json = json.load(f_c2l) + + if ' ' in label: + label_tmp = label.split(' ') for i in label_tmp: - label = removePunctuation(s_list[4]) + '/' + removePunctuation(i) - labels.append(label) - train_tmp.append(label) - train = ','.join(train_tmp) + '|,|' + removePunctuation(s_list[3]) - trains.append(train) - else: - label = removePunctuation(s_list[4]) + '/' + removePunctuation(s_list[5]) - labels.append(label) - trains.append(label + '|,|' + removePunctuation(s_list[3])) - - # 生成 label 文件 - with open(path_label, 'w', encoding='utf-8') as f_label: - labels = list(set(labels)) - labels.sort(reverse=False) - for line in labels: - f_label.write(line + '\n') - f_label.close() - - # 生成 train.csv 文件 - with open(path_train, 'w', encoding='utf-8') as f_train: - for line in trains: - f_train.write(line + '\n') + if i not in c2l_json[category]: + return False + + elif label not in c2l_json[category]: + return False + return True + + def gen_jieba_user_dict(self, path): + lable_dict = [] + for lines in self.corpus_labels: + for len in lines: + lable_dict.append(len) + lable_dict = list(set(lable_dict)) + with open(path, 'w', encoding='utf-8') as f_path: + for line in lable_dict: + f_path.write(line + '\n') + f_path.close() + return lable_dict + + def excel2csv(self): + labels = [] + trains = [] + data = [] + edata =[] + files = self.list_all_files(os.path.dirname(path_dataset)) + for file in files: + if file.startswith('0') or file.endswith('.xlsx'): + print('Will read execel file:' + file) + data += np.array(pd.read_excel(file)).tolist() + + for s_list in data: + # print(s_list) + raw_label = str(s_list[5]) + raw_title = str(s_list[3]) + raw_category = str(s_list[4]) + cov_label = self.removePunctuation(raw_label) + cov_title = self.removePunctuation(raw_title) + cov_category = raw_category.strip() + + # 跳过无效数据 + if 'nan' in raw_label or 'nan' in raw_title or 'nan' in raw_category: + continue + # 跳过 分类和标签 不匹配的数据 + if self.label_check(cov_category, cov_label) == False: + edata.append(str(s_list[0]) + str_split + cov_category + str_split + cov_label + str_split + cov_title) + continue + + label_tmp = cov_label.replace('/', ' ') # 去除字母标签分类的 ‘/’ + label_tmp = re.sub(r' ', ' ', label_tmp) # 去除标签里面的双空格 + + # 将 label 和 title 都加入语料库 + self.corpus_labels.append(list(label_tmp.split(' '))) + # jieba.suggest_freq('十八大', True) #修改词频,使其不能分离 + # self.corpus.append(list(jieba.cut(cov_title, cut_all=False, HMM=False))) + self.corpus_titles.append(cov_title) + + # 处理多标签的情况 + if ' ' in label_tmp: + label_tmp = label_tmp.split(' ') + train_tmp = [] + for i in label_tmp: + labels.append(i) + train_tmp.append(i) + trains.append(','.join(train_tmp) + str_split + cov_title) + else: + labels.append(cov_label) + trains.append(cov_label + str_split + cov_title) + + label_dict = self.gen_jieba_user_dict(path_embedding_user_dict) + jieba.load_userdict(path_embedding_user_dict) # 添加自定义词典 + + for line in self.corpus_titles: + self.corpus.append(list(jieba.cut(line, cut_all=False, HMM=False))) + self.corpus.append(label_dict) + + # 生成 label 文件 + with open(path_label, 'w', encoding='utf-8') as f_label: + labels = list(set(labels)) # 去重 + labels.sort(reverse=False) # 排序 + for line in labels: + f_label.write(line + '\n') + f_label.close() + + # 生成 train.csv vaild.csv test.csv 文件 + f_train = open(path_train, 'w', encoding='utf-8') + f_valid = open(path_valid, 'w', encoding='utf-8') + f_tests = open(path_tests, 'w', encoding='utf-8') + random.shuffle(trains) + f_valid.write('label'+ str_split + 'ques' + '\n') + f_train.write('label'+ str_split + 'ques' + '\n') + f_tests.write('label'+ str_split + 'ques' + '\n') + for i in range(len(trains)): + print(trains[i]) + # 拆分训练集、验证集、测试集 + if i % 5 == 0: + f_valid.write(trains[i] + '\n') + #elif i % 11 == 0: + # f_tests.write(trains[i] + '\n') + else: + f_train.write(trains[i] + '\n') + f_valid.close() f_train.close() + f_tests.close() + + # 生成有误数据集 error_data.csv 文件 + f_edata = open(path_edata, 'w', encoding='utf-8') + f_edata.write('province,category,label,ques' + '\n') + for i in range(len(edata)): + f_edata.write(edata[i] + '\n') + f_edata.close() + + def gen_vec(self): + print(self.corpus) + word_list = [] + char_list = [] + # 生成 word2vec 预训练 文件 + for line in self.corpus: + for word in line: + word_list.append(word) + for char in word: + char_list.append(char) + + word_list = list(set(word_list)) + char_list = list(set(char_list)) + with open(path_embedding_random_word, 'w', encoding='utf-8') as f_word_vec_bin: + for line in word_list: + f_word_vec_bin.write(line + '\n') + f_word_vec_bin.close() + + with open(path_embedding_random_char, 'w', encoding='utf-8') as f_char_vec_bin: + for line in char_list: + f_char_vec_bin.write(line + '\n') + f_char_vec_bin.close() + + print("start to gen word vec file") + # 嵌入参数: sg=> 0:CBOW 1:SKip-Gram, sentences = word_list + model_word = Word2Vec(corpus_file=path_embedding_random_word, size=300, window=5, min_count=1, workers=4) + model_word.wv.save_word2vec_format(path_embedding_vector_word2vec_word_bin, binary=True) + model_word.wv.save_word2vec_format(path_embedding_vector_word2vec_word, binary=False) - return None + print("start to gen word vec file") + model_word = Word2Vec(corpus_file=path_embedding_random_char, size=300, window=5, min_count=1, workers=4) + model_word.wv.save_word2vec_format(path_embedding_vector_word2vec_char_bin, binary=True) + model_word.wv.save_word2vec_format(path_embedding_vector_word2vec_char, binary=False) diff --git a/data_preprocess/data_split.py b/data_preprocess/data_split.py index e7eb454..4b19fb9 100644 --- a/data_preprocess/data_split.py +++ b/data_preprocess/data_split.py @@ -9,15 +9,14 @@ import pathlib import sys import os -project_path = str(pathlib.Path(os.path.abspath(__file__)).parent.parent.parent) -sys.path.append(project_path) from sklearn.model_selection import StratifiedKFold import pandas as pd import numpy as np import random +from data_preprocess.text_preprocess import txt_write, txt_read -from keras_textclassification.data_preprocess.text_preprocess import txt_write, txt_read - +project_path = str(pathlib.Path(os.path.abspath(__file__)).parent.parent.parent) +sys.path.append(project_path) def data_kfold(path_org_data, k_fold_split=10, path_save_dir=""): """ @@ -123,7 +122,7 @@ def shuffle_corpus(corpus): if __name__ == '__main__': - from keras_textclassification.conf.path_config import path_root + from conf.path_config import path_root filepath = path_root + "/data/baidu_qa_2019/baike_qa_train.csv" # 原始语料 k_fold_split = 10 data_kfold(path_org_data=filepath, k_fold_split=10, path_save_dir=path_root+ "/data/baidu_qa_2019/") diff --git a/data_preprocess/generator_preprocess.py b/data_preprocess/generator_preprocess.py index 9e3a8b3..cadee12 100644 --- a/data_preprocess/generator_preprocess.py +++ b/data_preprocess/generator_preprocess.py @@ -5,11 +5,7 @@ # @function: -from keras_textclassification.data_preprocess.text_preprocess import load_json, save_json, txt_read -from keras_textclassification.conf.path_config import path_model_dir -from keras_textclassification.conf.path_config import path_baidu_qa_2019_train, path_baidu_qa_2019_valid, path_root -from tqdm import tqdm -import pandas as pd +from data_preprocess.text_preprocess import load_json, save_json import numpy as np import json import os @@ -22,7 +18,7 @@ class PreprocessGenerator: def __init__(self, path_model_dir): self.l2i_i2l = None self.path_fast_text_model_vocab2index = path_model_dir + 'vocab2index.json' - self.path_fast_text_model_l2i_i2l = path_model_dir + 'l2i_i2l.json' + self.path_fast_text_model_l2i_i2l = path_l2i_i2l if os.path.exists(self.path_fast_text_model_l2i_i2l): self.l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) @@ -143,7 +139,7 @@ class PreprocessSimGenerator: def __init__(self, path_model_dir): self.l2i_i2l = None self.path_fast_text_model_vocab2index = path_model_dir + 'vocab2index.json' - self.path_fast_text_model_l2i_i2l = path_model_dir + 'l2i_i2l.json' + self.path_fast_text_model_l2i_i2l = path_l2i_i2l if os.path.exists(self.path_fast_text_model_l2i_i2l): self.l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) diff --git a/data_preprocess/text_preprocess.py b/data_preprocess/text_preprocess.py index 230d3bc..52ad48a 100644 --- a/data_preprocess/text_preprocess.py +++ b/data_preprocess/text_preprocess.py @@ -5,21 +5,18 @@ # @function :data utils of text classification -# from keras_textclassification.conf.path_config import path_model_dir -# path_fast_text_model_vocab2index = path_model_dir + 'vocab2index.json' -# path_fast_text_model_l2i_i2l = path_model_dir + 'l2i_i2l.json' +from conf.path_config import path_l2i_i2l from collections import Counter from tqdm import tqdm import pandas as pd import numpy as np import random -# import jieba import json import re import os -__all__ = ["PreprocessText", "PreprocessTextMulti", "PreprocessSim"] +__all__ = ["PreprocessText", "PreprocessTextMulti"] __tools__ = ["txt_read", "txt_write", "extract_chinese", "read_and_process", "preprocess_label_ques", "save_json", "load_json", "delete_file", @@ -49,7 +46,7 @@ def txt_read(file_path, encode_type='utf-8'): return list_line -def txt_write(list_line, file_path, type='w', encode_type='utf-8'): +def txt_write(list_line: object, file_path: object, type: object = 'w', encode_type: object = 'utf-8') -> object: """ txt写入list文件 :param listLine:list, list文件,写入要带"\n" @@ -114,19 +111,17 @@ def preprocess_label_ques(path): x_y.append(line_y+','+line_x+'\n') return x_y - def save_json(jsons, json_path): """ 保存json, - :param json_: json + :param json_: json :param path: str :return: None """ with open(json_path, 'w', encoding='utf-8') as fj: - fj.write(json.dumps(jsons, ensure_ascii=False)) + fj.write(json.dumps(jsons, ensure_ascii=False, sort_keys=True, indent=2)) fj.close() - def load_json(path): """ 获取json,只取第一行 @@ -134,7 +129,7 @@ def load_json(path): :return: json """ with open(path, 'r', encoding='utf-8') as fj: - model_json = json.loads(fj.readlines()[0]) + model_json = json.loads(fj.read()) return model_json @@ -201,7 +196,7 @@ class PreprocessText: def __init__(self, path_model_dir): self.l2i_i2l = None self.path_fast_text_model_vocab2index = path_model_dir + 'vocab2index.json' - self.path_fast_text_model_l2i_i2l = path_model_dir + 'l2i_i2l.json' + self.path_fast_text_model_l2i_i2l = path_l2i_i2l if os.path.exists(self.path_fast_text_model_l2i_i2l): self.l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) @@ -311,7 +306,7 @@ class PreprocessTextMulti: def __init__(self, path_model_dir): self.l2i_i2l = None self.path_fast_text_model_vocab2index = path_model_dir + 'vocab2index.json' - self.path_fast_text_model_l2i_i2l = path_model_dir + 'l2i_i2l.json' + self.path_fast_text_model_l2i_i2l = path_l2i_i2l if os.path.exists(self.path_fast_text_model_l2i_i2l): self.l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) @@ -369,7 +364,7 @@ def preprocess_label_ques_to_idx(self, embedding_type, path, embed, rate=1, shuf ques, label = ques[indexs].tolist(), label[indexs].tolist() if not os.path.exists(self.path_fast_text_model_l2i_i2l): - from keras_textclassification.conf.path_config import path_label + from conf.path_config import path_label byte_multi_news_label = txt_read(path_label) byte_multi_news_label = [i.strip().upper() for i in byte_multi_news_label] @@ -391,7 +386,6 @@ def preprocess_label_ques_to_idx(self, embedding_type, path, embed, rate=1, shuf l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) len_label_set = len(l2i_i2l['l2i']) - x = [] print("ques to index start!") for i in tqdm(range(len_ql)): @@ -434,427 +428,4 @@ def preprocess_label_ques_to_idx(self, embedding_type, path, embed, rate=1, shuf return x_all, y_ else: x_, y_ = np.array(x), np.array(label_multi_list) - return x_, y_ - - -class PreprocessSim: - """ - 数据预处理, 输入为csv格式, [label,ques] - """ - def __init__(self, path_model_dir): - self.l2i_i2l = None - self.path_fast_text_model_vocab2index = path_model_dir + 'vocab2index.json' - self.path_fast_text_model_l2i_i2l = path_model_dir + 'l2i_i2l.json' - if os.path.exists(self.path_fast_text_model_l2i_i2l): - self.l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) - - def prereocess_idx(self, pred, digits=5): - if os.path.exists(self.path_fast_text_model_l2i_i2l): - pred_i2l = {} - i2l = self.l2i_i2l['i2l'] - for i in range(len(pred)): - pred_i2l[i2l[str(i)]] = round(float(pred[i]), digits) - pred_i2l_rank = [sorted(pred_i2l.items(), key=lambda k: k[1], reverse=True)] - return pred_i2l_rank - else: - raise RuntimeError("path_fast_text_model_label2index is None") - - def prereocess_pred_xid(self, pred): - if os.path.exists(self.path_fast_text_model_l2i_i2l): - pred_l2i = {} - l2i = self.l2i_i2l['l2i'] - for i in range(len(pred)): - pred_l2i[pred[i]] = l2i[pred[i]] - pred_l2i_rank = [sorted(pred_l2i.items(), key=lambda k: k[1], reverse=True)] - return pred_l2i_rank - else: - raise RuntimeError("path_fast_text_model_label2index is None") - - def preprocess_label_ques_to_idx(self, embedding_type, path, embed, rate=1, shuffle=True): - data = pd.read_csv(path) - ques_1 = data['sentence1'].tolist() - ques_2 = data['sentence2'].tolist() - label = data['label'].tolist() - ques_1 = [str(q1).upper() for q1 in ques_1] - ques_2 = [str(q2).upper() for q2 in ques_2] - - label = [str(l).upper() for l in label] - if shuffle: - ques_1 = np.array(ques_1) - ques_2 = np.array(ques_2) - label = np.array(label) - indexs = [ids for ids in range(len(label))] - random.shuffle(indexs) - ques_1, ques_2, label = ques_1[indexs].tolist(), ques_2[indexs].tolist(), label[indexs].tolist() - # 如果label2index存在则不转换了 - if not os.path.exists(self.path_fast_text_model_l2i_i2l): - label_set = set(label) - count = 0 - label2index = {} - index2label = {} - for label_one in label_set: - label2index[label_one] = count - index2label[count] = label_one - count = count + 1 - - l2i_i2l = {} - l2i_i2l['l2i'] = label2index - l2i_i2l['i2l'] = index2label - save_json(l2i_i2l, self.path_fast_text_model_l2i_i2l) - else: - l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) - - len_ql = int(rate * len(label)) - if len_ql <= 500: # sample时候不生效,使得语料足够训练 - len_ql = len(label) - - x = [] - print("ques to index start!") - for i in tqdm(range(len_ql)): - que_1 = ques_1[i] - que_2 = ques_2[i] - que_embed = embed.sentence2idx(text=que_1, second_text=que_2) - x.append(que_embed) # [[], ] - label_zo = [] - print("label to onehot start!") - label_len_ql = label[0:len_ql] - for j in tqdm(range(len_ql)): - label_one = label_len_ql[j] - label_zeros = [0] * len(l2i_i2l['l2i']) - label_zeros[l2i_i2l['l2i'][label_one]] = 1 - label_zo.append(label_zeros) - - if embedding_type in ['bert', 'albert']: - x_, y_ = np.array(x), np.array(label_zo) - x_1 = np.array([x[0] for x in x_]) - x_2 = np.array([x[1] for x in x_]) - x_all = [x_1, x_2] - return x_all, y_ - - -class PreprocessSimCCKS2020baidu: - """ - 数据预处理, 输入为csv格式, [label,ques] - """ - def __init__(self, path_model_dir): - self.l2i_i2l = None - self.path_fast_text_model_vocab2index = path_model_dir + 'vocab2index.json' - self.path_fast_text_model_l2i_i2l = path_model_dir + 'l2i_i2l.json' - if os.path.exists(self.path_fast_text_model_l2i_i2l): - self.l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) - - def prereocess_idx(self, pred): - if os.path.exists(self.path_fast_text_model_l2i_i2l): - pred_i2l = {} - i2l = self.l2i_i2l['i2l'] - for i in range(len(pred)): - pred_i2l[i2l[str(i)]] = pred[i] - pred_i2l_rank = [sorted(pred_i2l.items(), key=lambda k: k[1], reverse=True)] - return pred_i2l_rank - else: - raise RuntimeError("path_fast_text_model_label2index is None") - - def prereocess_pred_xid(self, pred): - if os.path.exists(self.path_fast_text_model_l2i_i2l): - pred_l2i = {} - l2i = self.l2i_i2l['l2i'] - for i in range(len(pred)): - pred_l2i[pred[i]] = l2i[pred[i]] - pred_l2i_rank = [sorted(pred_l2i.items(), key=lambda k: k[1], reverse=True)] - return pred_l2i_rank - else: - raise RuntimeError("path_fast_text_model_label2index is None") - - def preprocess_label_ques_to_idx(self, embedding_type, path, embed, - rate=1, shuffle=True, graph=None): - if "json" in path: - datas = txt_read(path) - ques_1 = [] - ques_2 = [] - label = [] - offset = [] - mention = [] - for data_str in datas: - data = json.loads(data_str) - ques_1 += [data['sentence1']] - ques_2 += [data['sentence2']] - mention += [data['mention']] - label += [data['label']] - offset += [data['offset']] - elif "csv" in path: - data = pd.read_csv(path) - ques_1 = data['sentence1'].tolist() - ques_2 = data['sentence2'].tolist() - label = data['label'].tolist() - offset = data['offset'].tolist() - - ques_1 = [str(q1).upper() for q1 in ques_1] - ques_2 = [str(q2).upper() for q2 in ques_2] - - # label = [str(l).upper() for l in label] - label = [str(l) for l in label] - if shuffle: - ques_1 = np.array(ques_1) - ques_2 = np.array(ques_2) - label = np.array(label) - mention = np.array(mention) - offset = np.array(offset) - - indexs = [ids for ids in range(len(label))] - random.shuffle(indexs) - ques_1 = ques_1[indexs].tolist() - ques_2 = ques_2[indexs].tolist() - label = label[indexs].tolist() - mention = mention[indexs].tolist() - offset = offset[indexs].tolist() - # 如果label2index存在则不转换了 - if not os.path.exists(self.path_fast_text_model_l2i_i2l): - label_set = set(label) - count = 0 - label2index = {} - index2label = {} - for label_one in label_set: - label2index[label_one] = count - index2label[count] = label_one - count = count + 1 - - l2i_i2l = {} - l2i_i2l['l2i'] = label2index - l2i_i2l['i2l'] = index2label - save_json(l2i_i2l, self.path_fast_text_model_l2i_i2l) - else: - l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) - - len_ql = int(rate * len(label)) - if len_ql <= 1: # sample时候不生效,使得语料足够训练 - len_ql = len(label) - - x = [] - print("ques to index start!") - for i in tqdm(range(len_ql)): - que_1 = ques_1[i] - que_2 = ques_2[i] - mention_1 = mention[i] - # que_embed = embed.sentence2idx(text=que_1, second_text=que_2) - # x.append(que_embed) # [[], ] - offset_i = int(offset[i]) - # ques_entity = que_1 + "##" + que_1[offset_i+len(que_2):] - # ques_entity = que_1 - # que_embed1 = embed.sentence2idx(text=que_1, second_text=que_2) - if embedding_type in ['bert', 'albert']: - ########################################1111111############## - # [input_id, input_type_id] = que_embed - # input_entity_mask = [0] * len(input_id) - # input_entity_mask[offset_i:offset_i+len(que_2)] = [1] * len(que_2) - # # x.append(que_embed) # [[], ] - # x.append([input_id, input_type_id, input_entity_mask]) - # # x.append([input_id, input_type_id, input_entity_mask, offset_i]) - ########################################2222222指针网络###################################### - # [input_id, input_type_id] = que_embed - # input_start_mask = [0] * len(input_id) - # input_start_mask[offset_i] = 1 - # input_end_mask = [0] * len(input_id) - # input_end_mask[offset_i + len(mention_1) - 1] = 1 - # x.append([input_id, input_type_id, input_start_mask, input_start_mask]) - ########################################分开两个句子################################################### - que_embed_1 = embed.sentence2idx(text=que_1) - # que_embed_1 = [que[:54] for que in que_embed_1] - - que_embed_2 = embed.sentence2idx(text=que_2) - # que_embed_2 = [que[:256-54] for que in que_embed_2] - try: - """ques1""" - [input_id_1, input_type_id_1, input_mask_1] = que_embed_1 - input_start_mask_1 = [0] * len(input_id_1) - input_start_mask_1[offset_i] = 1 - input_end_mask_1 = [0] * len(input_id_1) - input_end_mask_1[offset_i+len(mention_1)-1] = 1 - input_entity_mask_1 = [0] * len(input_id_1) - input_entity_mask_1[offset_i:offset_i+len(mention_1)] = [1] * len(mention_1) - """ques2""" - [input_id_2, input_type_id_2, input_mask_2] = que_embed_2 - kind_2 = [0] * len(input_type_id_2) - que_2_sp = que_2.split("|") - que_2_sp_sp = que_2_sp[0].split(":") - kind_2_start = len(que_2_sp_sp[0]) - 1 - kind_2_end = kind_2_start + len(que_2_sp_sp[1]) - 1 - kind_2[kind_2_start:kind_2_end] = [1] * (kind_2_end-kind_2_start) - kind_21 = [0] * len(input_type_id_2) - if "标签" in que_2_sp[1]: - que_21_sp_sp = que_2_sp[1].split(":") - kind_21_start = len(que_2_sp[0]) + len(que_21_sp_sp[0]) - 1 - kind_21_end = len(que_2_sp[0]) + len(que_21_sp_sp[0]) + len(que_21_sp_sp[1]) - 1 - kind_21[kind_21_start:kind_21_end] = [1] * (kind_21_end - kind_21_start) - except Exception as e: - print(str(e)) - gg = 0 - - x.append([input_id_1, input_type_id_1, input_mask_1, input_start_mask_1, input_end_mask_1, input_entity_mask_1, - input_id_2, input_type_id_2, input_mask_2, kind_2, kind_21]) - - - elif embedding_type == 'xlnet': - if embed.trainable: - [token_input, segment_input, memory_length_input, mask_input] = que_embed - input_entity_mask = [0] * len(token_input) - input_entity_mask[offset_i:offset_i + len(que_2)] = [1] * len(que_2) - # x.append(que_embed) # [[], ] - x.append([token_input, segment_input, memory_length_input, mask_input, input_entity_mask]) - else: - [token_input, segment_input, memory_length_input] = que_embed - input_entity_mask = [0] * len(token_input) - input_entity_mask[offset_i:offset_i + len(que_2)] = [1] * len(que_2) - x.append([token_input, segment_input, memory_length_input, input_entity_mask]) - - label_zo = [] - print("label to onehot start!") - label_len_ql = label[0:len_ql] - for j in tqdm(range(len_ql)): - label_one = label_len_ql[j] - label_zeros = [0] * len(l2i_i2l['l2i']) - label_zeros[l2i_i2l['l2i'][label_one]] = 1 - label_zo.append(label_zeros) - - if embedding_type in ['bert', 'albert']: - x_, y_ = np.array(x), np.array(label_zo) - # x_1 = np.array([x[0] for x in x_]) - # x_2 = np.array([x[1] for x in x_]) - # x_3 = np.array([x[2] for x in x_]) - # x_4 = np.array([x[3] for x in x_]) - # x_all = [x_1, x_2, x_3, x_4] - x_all = [] - for i in range(len(x_[0])): - x_all.append(np.array([x[i] for x in x_])) - return x_all, y_ - elif embedding_type == 'xlnet': - x_, y_ = x, np.array(label_zo) - x_1 = np.array([x[0][0] for x in x_]) - x_2 = np.array([x[1][0] for x in x_]) - x_3 = np.array([x[2][0] for x in x_]) - x_4 = np.array([x[3][0] for x in x_]) - if embed.trainable: - x_5 = np.array([x[4][0] for x in x_]) - x_all = [x_1, x_2, x_3, x_4, x_5] - else: - x_all = [x_1, x_2, x_3, x_4] - return x_all, y_ - else: - x_, y_ = np.array(x), np.array(label_zo) - return x_, y_ - - -class PreprocessSimConv2019: - """ - 数据预处理, 输入为csv格式, [label,ques] - """ - def __init__(self, path_model_dir): - self.l2i_i2l = None - self.path_fast_text_model_vocab2index = path_model_dir + 'vocab2index.json' - self.path_fast_text_model_l2i_i2l = path_model_dir + 'l2i_i2l.json' - if os.path.exists(self.path_fast_text_model_l2i_i2l): - self.l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) - - def prereocess_idx(self, pred): - if os.path.exists(self.path_fast_text_model_l2i_i2l): - pred_i2l = {} - i2l = self.l2i_i2l['i2l'] - for i in range(len(pred)): - pred_i2l[i2l[str(i)]] = pred[i] - pred_i2l_rank = [sorted(pred_i2l.items(), key=lambda k: k[1], reverse=True)] - return pred_i2l_rank - else: - raise RuntimeError("path_fast_text_model_label2index is None") - - def prereocess_pred_xid(self, pred): - if os.path.exists(self.path_fast_text_model_l2i_i2l): - pred_l2i = {} - l2i = self.l2i_i2l['l2i'] - for i in range(len(pred)): - pred_l2i[pred[i]] = l2i[pred[i]] - pred_l2i_rank = [sorted(pred_l2i.items(), key=lambda k: k[1], reverse=True)] - return pred_l2i_rank - else: - raise RuntimeError("path_fast_text_model_label2index is None") - - def preprocess_label_ques_to_idx(self, embedding_type, path, embed, rate=1, shuffle=True): - data = pd.read_csv(path) - # category, query1, query2, label - ques_1 = data['query1'].tolist() - category = data['category'].tolist() - ques_2 = data['query2'].tolist() - label = data['label'].tolist() - ques_1 = [str(q1).upper() for q1 in ques_1] - ques_2 = [str(q2).upper() for q2 in ques_2] - - label = [str(l).upper() for l in label] - if shuffle: - ques_1 = np.array(ques_1) - ques_2 = np.array(ques_2) - category = np.array(category) - label = np.array(label) - indexs = [ids for ids in range(len(label))] - random.shuffle(indexs) - ques_1, ques_2, label, category = ques_1[indexs].tolist(), ques_2[indexs].tolist(), label[indexs].tolist(), category[indexs].tolist() - # 如果label2index存在则不转换了 - if not os.path.exists(self.path_fast_text_model_l2i_i2l): - label_set = set(label) - count = 0 - label2index = {} - index2label = {} - for label_one in label_set: - label2index[label_one] = count - index2label[count] = label_one - count = count + 1 - - l2i_i2l = {} - l2i_i2l['l2i'] = label2index - l2i_i2l['i2l'] = index2label - save_json(l2i_i2l, self.path_fast_text_model_l2i_i2l) - else: - l2i_i2l = load_json(self.path_fast_text_model_l2i_i2l) - - len_ql = int(rate * len(label)) - if len_ql <= 500: # sample时候不生效,使得语料足够训练 - len_ql = len(label) - - x = [] - print("ques to index start!") - len_ques_list = [] - label_list = [] - for i in tqdm(range(len_ql)): - que_1 = ques_1[i] - que_2 = ques_2[i] - category_3 = category[i] - que_embed = embed.sentence2idx(text=category_3+":"+que_1, second_text=category_3+":"+que_2) - - # que_embed = embed.sentence2idx(text=category_3+":"+que_1, second_text=category_3+":"+que_2) - # que_embed = embed.sentence2idx(text=que_1, second_text=que_2) - x.append(que_embed) # [[], ] - len_ques_list.append(len(que_1+que_2)) - label_list.append(category_3) - len_ques_counter = Counter(len_ques_list) - label_counter = Counter(label_list) - print("长度:{}".format(dict(len_ques_counter))) - print("长度字典:{}".format(dict(len_ques_counter).keys())) - print("最大长度:{}".format(max(list(dict(len_ques_counter).keys())))) - print("类别字典:{}".format(dict(label_counter))) - label_zo = [] - print("label to onehot start!") - label_len_ql = label[0:len_ql] - for j in tqdm(range(len_ql)): - label_one = label_len_ql[j] - label_zeros = [0] * len(l2i_i2l['l2i']) - label_zeros[l2i_i2l['l2i'][label_one]] = 1 - label_zo.append(label_zeros) - - if embedding_type in ['bert', 'albert']: - x_, y_ = np.array(x), np.array(label_zo) - x_1 = np.array([x[0] for x in x_]) - x_2 = np.array([x[1] for x in x_]) - x_all = [x_1, x_2] - return x_all, y_ - else: - x_, y_ = np.array(x), np.array(label_zo) - - return x_, y_ - + return x_, y_ \ No newline at end of file diff --git a/data_preprocess/utils.py b/data_preprocess/utils.py new file mode 100644 index 0000000..2839249 --- /dev/null +++ b/data_preprocess/utils.py @@ -0,0 +1,61 @@ +import os +import numpy as np +import matplotlib.pyplot as plt +from shutil import copyfile + +def draw_accuracy_figure(H, output_path): + # H: history + # 准确率图形输出 + N = np.arange(0, H.epoch.__len__()) + plt.style.use("ggplot") + plt.figure(figsize=(12, 5)) + plt.subplot(1, 2, 1) + plt.title("Training Accuracy (Multi Labels)") + plt.plot(N, H.history['acc'], 'bo-', label='train') + plt.plot(N, H.history['val_acc'], 'r^:', label='test') + plt.xlabel("Epoch") + plt.ylabel("Accuracy") + plt.legend() + + plt.subplot(1, 2, 2) + plt.title("Training loss (Multi Labels)") + plt.xlabel("Epoch") + plt.ylabel("Loss") + plt.plot(N, H.history['loss'], 'bo-', label='train_loss') + plt.plot(N, H.history['val_loss'], 'r^:', label='test_loss') + plt.legend() + mkdir(output_path) + plt.savefig(output_path + '/train') + +def mkdir(path): + # 去除首位空格 + path = path.strip() + # 去除尾部 \ 符号 + path = path.rstrip("\\") + + # 判断路径是否存在 + # 存在 True + # 不存在 False + isExists = os.path.exists(path) + + # 判断结果 + if not isExists: + # 如果不存在则创建目录 + # 创建目录操作函数 + os.makedirs(path) + + print(path + ' 创建成功') + return True + else: + # 如果目录存在则不创建,并提示目录已存在 + print(path + ' 目录已存在') + return False + +def copy_file(source, target): + try: + copyfile(source, target) + except IOError as e: + print("Unable to copy file. %s" % e) + except: + print("Unexpected error:") + print("\nFile copy done!\n") \ No newline at end of file diff --git a/keras_layers/__init__.py b/keras_layers/__init__.py new file mode 100644 index 0000000..630f55f --- /dev/null +++ b/keras_layers/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/22 7:33 +# @author :Mo +# @function : \ No newline at end of file diff --git a/keras_layers/albert/__init__.py b/keras_layers/albert/__init__.py new file mode 100644 index 0000000..2cbc15a --- /dev/null +++ b/keras_layers/albert/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/10/14 9:46 +# @author :Mo +# @function : \ No newline at end of file diff --git a/keras_layers/albert/albert.py b/keras_layers/albert/albert.py new file mode 100644 index 0000000..5c00651 --- /dev/null +++ b/keras_layers/albert/albert.py @@ -0,0 +1,331 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/22 7:35 +# @author :TinkerMob +# @function :keras_albert_model +# @code :code from https://github.com/TinkerMob/keras_albert_model + + +from keras_adaptive_softmax import AdaptiveEmbedding, AdaptiveSoftmax +from keras_bert import get_custom_objects as get_bert_custom_objects +from keras_position_wise_feed_forward import FeedForward +from keras_layer_normalization import LayerNormalization +from keras_bert.activations.gelu_fallback import gelu +from keras_multi_head import MultiHeadAttention +from keras_bert.layers import Masked, Extract +from keras_pos_embd import PositionEmbedding +from keras_bert.backend import keras +import tensorflow as tf +import numpy as np +import json +import os + + +__all__ = [ + 'get_custom_objects', 'build_albert', + 'load_brightmart_albert_zh_checkpoint', +] + + +def get_custom_objects(): + custom_objects = get_bert_custom_objects() + custom_objects['AdaptiveEmbedding'] = AdaptiveEmbedding + custom_objects['AdaptiveSoftmax'] = AdaptiveSoftmax + return custom_objects + + +def build_albert(token_num, + pos_num=512, + seq_len=512, + embed_dim=128, + hidden_dim=768, + transformer_num=12, + head_num=12, + feed_forward_dim=3072, + dropout_rate=0.1, + attention_activation=None, + feed_forward_activation='gelu', + training=True, + trainable=None, + output_layers=None): + """Get ALBERT model. + See: https://arxiv.org/pdf/1909.11942.pdf + :param token_num: Number of tokens. + :param pos_num: Maximum position. + :param seq_len: Maximum length of the input sequence or None. + :param embed_dim: Dimensions of embeddings. + :param hidden_dim: Dimensions of hidden layers. + :param transformer_num: Number of transformers. + :param head_num: Number of heads in multi-head attention + in each transformer. + :param feed_forward_dim: Dimension of the feed forward layer + in each transformer. + :param dropout_rate: Dropout rate. + :param attention_activation: Activation for attention layers. + :param feed_forward_activation: Activation for feed-forward layers. + :param training: A built model with MLM and NSP outputs will be returned + if it is `True`, otherwise the input layers and the last + feature extraction layer will be returned. + :param trainable: Whether the model is trainable. + :param output_layers: A list of indices of output layers. + """ + if attention_activation == 'gelu': + attention_activation = gelu + if feed_forward_activation == 'gelu': + feed_forward_activation = gelu + if trainable is None: + trainable = training + + def _trainable(_layer): + if isinstance(trainable, (list, tuple, set)): + for prefix in trainable: + if _layer.name.startswith(prefix): + return True + return False + return trainable + + # Build inputs + input_token = keras.layers.Input(shape=(seq_len,), name='Input-Token') + input_segment = keras.layers.Input(shape=(seq_len,), name='Input-Segment') + inputs = [input_token, input_segment] + + # Build embeddings + embed_token, embed_weights, embed_projection = AdaptiveEmbedding( + input_dim=token_num, + output_dim=hidden_dim, + embed_dim=embed_dim, + mask_zero=True, + trainable=trainable, + return_embeddings=True, + return_projections=True, + name='Embed-Token', + )(input_token) + embed_segment = keras.layers.Embedding( + input_dim=2, + output_dim=hidden_dim, + trainable=trainable, + name='Embed-Segment', + )(input_segment) + embed_layer = keras.layers.Add(name='Embed-Token-Segment')( + [embed_token, embed_segment]) + embed_layer = PositionEmbedding( + input_dim=pos_num, + output_dim=hidden_dim, + mode=PositionEmbedding.MODE_ADD, + trainable=trainable, + name='Embedding-Position', + )(embed_layer) + + if dropout_rate > 0.0: + dropout_layer = keras.layers.Dropout( + rate=dropout_rate, + name='Embedding-Dropout', + )(embed_layer) + else: + dropout_layer = embed_layer + embed_layer = LayerNormalization( + trainable=trainable, + name='Embedding-Norm', + )(dropout_layer) + + # Build shared transformer + attention_layer = MultiHeadAttention( + head_num=head_num, + activation=attention_activation, + name='Attention', + ) + attention_normal = LayerNormalization(name='Attention-Normal') + feed_forward_layer = FeedForward( + units=feed_forward_dim, + activation=feed_forward_activation, + name='Feed-Forward' + ) + feed_forward_normal = LayerNormalization(name='Feed-Forward-Normal') + + transformed = embed_layer + transformed_layers = [] + for i in range(transformer_num): + attention_input = transformed + transformed = attention_layer(transformed) + if dropout_rate > 0.0: + transformed = keras.layers.Dropout( + rate=dropout_rate, + name='Attention-Dropout-{}'.format(i + 1), + )(transformed) + transformed = keras.layers.Add( + name='Attention-Add-{}'.format(i + 1), + )([attention_input, transformed]) + transformed = attention_normal(transformed) + + feed_forward_input = transformed + transformed = feed_forward_layer(transformed) + if dropout_rate > 0.0: + transformed = keras.layers.Dropout( + rate=dropout_rate, + name='Feed-Forward-Dropout-{}'.format(i + 1), + )(transformed) + transformed = keras.layers.Add( + name='Feed-Forward-Add-{}'.format(i + 1), + )([feed_forward_input, transformed]) + transformed = feed_forward_normal(transformed) + transformed_layers.append(transformed) + + if training: + # Build tasks + mlm_dense_layer = keras.layers.Dense( + units=hidden_dim, + activation=feed_forward_activation, + name='MLM-Dense', + )(transformed) + mlm_norm_layer = LayerNormalization(name='MLM-Norm')(mlm_dense_layer) + mlm_pred_layer = AdaptiveSoftmax( + input_dim=hidden_dim, + output_dim=token_num, + embed_dim=embed_dim, + bind_embeddings=True, + bind_projections=True, + name='MLM-Sim', + )([mlm_norm_layer, embed_weights, embed_projection]) + masked_layer = Masked(name='MLM')([mlm_pred_layer, inputs[-1]]) + extract_layer = Extract(index=0, name='Extract')(transformed) + nsp_dense_layer = keras.layers.Dense( + units=hidden_dim, + activation='tanh', + name='SOP-Dense', + )(extract_layer) + nsp_pred_layer = keras.layers.Dense( + units=2, + activation='softmax', + name='SOP', + )(nsp_dense_layer) + model = keras.models.Model( + inputs=inputs, + outputs=[masked_layer, nsp_pred_layer]) + for layer in model.layers: + layer.trainable = _trainable(layer) + return model + if output_layers is not None: + if isinstance(output_layers, list): + output_layers = [ + transformed_layers[index] for index in output_layers] + output = keras.layers.Concatenate( + name='Output', + )(output_layers) + else: + output = transformed_layers[output_layers] + model = keras.models.Model(inputs=inputs, outputs=output) + return model + model = keras.models.Model(inputs=inputs, outputs=transformed) + for layer in model.layers: + layer.trainable = _trainable(layer) + return inputs, transformed + + +def load_brightmart_albert_zh_checkpoint(checkpoint_path, **kwargs): + """Load checkpoint from https://github.com/brightmart/albert_zh + :param checkpoint_path: path to checkpoint folder. + :param kwargs: arguments for albert model. + :return: + """ + config = {} + for file_name in os.listdir(checkpoint_path): + if file_name.startswith('bert_config.json'): + with open(os.path.join(checkpoint_path, file_name)) as reader: + config = json.load(reader) + break + + def _set_if_not_existed(key, value): + if key not in kwargs: + kwargs[key] = value + + # 修改部分,必须输入is_training, len_max + training = kwargs['training'] + # config['max_position_embeddings'] = config['max_position_embeddings'] = kwargs['len_max'] + _set_if_not_existed('training', True) + _set_if_not_existed('token_num', config['vocab_size']) + _set_if_not_existed('pos_num', config['max_position_embeddings']) + _set_if_not_existed('seq_len', config['max_position_embeddings']) + _set_if_not_existed('embed_dim', config['embedding_size']) + _set_if_not_existed('hidden_dim', config['hidden_size']) + _set_if_not_existed('transformer_num', config['num_hidden_layers']) + _set_if_not_existed('head_num', config['num_attention_heads']) + _set_if_not_existed('feed_forward_dim', config['intermediate_size']) + _set_if_not_existed('dropout_rate', config['hidden_dropout_prob']) + _set_if_not_existed('feed_forward_activation', config['hidden_act']) + + model = build_albert(**kwargs) + if not training: + inputs, outputs = model + model = keras.models.Model(inputs, outputs) + + def _checkpoint_loader(checkpoint_file): + def _loader(name): + return tf.train.load_variable(checkpoint_file, name) + return _loader + + loader = _checkpoint_loader( + os.path.join(checkpoint_path, 'bert_model.ckpt')) + + model.get_layer(name='Embed-Token').set_weights([ + loader('bert/embeddings/word_embeddings'), + loader('bert/embeddings/word_embeddings_2'), + ]) + model.get_layer(name='Embed-Segment').set_weights([ + loader('bert/embeddings/token_type_embeddings'), + ]) + model.get_layer(name='Embedding-Position').set_weights([ + loader('bert/embeddings/position_embeddings'), + ]) + model.get_layer(name='Embedding-Norm').set_weights([ + loader('bert/embeddings/LayerNorm/gamma'), + loader('bert/embeddings/LayerNorm/beta'), + ]) + + model.get_layer(name='Attention').set_weights([ + loader('bert/encoder/layer_shared/attention/self/query/kernel'), + loader('bert/encoder/layer_shared/attention/self/query/bias'), + loader('bert/encoder/layer_shared/attention/self/key/kernel'), + loader('bert/encoder/layer_shared/attention/self/key/bias'), + loader('bert/encoder/layer_shared/attention/self/value/kernel'), + loader('bert/encoder/layer_shared/attention/self/value/bias'), + loader('bert/encoder/layer_shared/attention/output/dense/kernel'), + loader('bert/encoder/layer_shared/attention/output/dense/bias'), + ]) + model.get_layer(name='Attention-Normal').set_weights([ + loader('bert/encoder/layer_shared/attention/output/LayerNorm/gamma'), + loader('bert/encoder/layer_shared/attention/output/LayerNorm/beta'), + ]) + model.get_layer(name='Feed-Forward').set_weights([ + loader('bert/encoder/layer_shared/intermediate/dense/kernel'), + loader('bert/encoder/layer_shared/intermediate/dense/bias'), + loader('bert/encoder/layer_shared/output/dense/kernel'), + loader('bert/encoder/layer_shared/output/dense/bias'), + ]) + model.get_layer(name='Feed-Forward-Normal').set_weights([ + loader('bert/encoder/layer_shared/output/LayerNorm/gamma'), + loader('bert/encoder/layer_shared/output/LayerNorm/beta'), + ]) + + if training: + model.get_layer(name='MLM-Dense').set_weights([ + loader('cls/predictions/transform/dense/kernel'), + loader('cls/predictions/transform/dense/bias'), + ]) + model.get_layer(name='MLM-Norm').set_weights([ + loader('cls/predictions/transform/LayerNorm/gamma'), + loader('cls/predictions/transform/LayerNorm/beta'), + ]) + model.get_layer(name='MLM-Sim').set_weights([ + loader('cls/predictions/output_bias'), + ]) + + model.get_layer(name='SOP-Dense').set_weights([ + loader('bert/pooler/dense/kernel'), + loader('bert/pooler/dense/bias'), + ]) + model.get_layer(name='SOP').set_weights([ + np.transpose(loader('cls/seq_relationship/output_weights')), + loader('cls/seq_relationship/output_bias'), + ]) + + return model \ No newline at end of file diff --git a/keras_layers/attention_dot.py b/keras_layers/attention_dot.py new file mode 100644 index 0000000..859c659 --- /dev/null +++ b/keras_layers/attention_dot.py @@ -0,0 +1,104 @@ +# !/usr/bin/python +# -*- coding: utf-8 -*- +# @time : 2020/3/31 19:10 +# @author : Mo +# @function: Attention of dot + + +from keras.regularizers import L1L2 +# from keras.engine.topology import Layer +from keras.layers import Layer +from keras import backend as K +import tensorflow as tf + + +class Attention(Layer): + def __init__(self, **kwargs): + super().__init__(** kwargs) + + def build(self, input_shape): + self.W = self.add_weight(name='Attention_Dot_Weight', + shape=(input_shape[1], input_shape[1]), + regularizer=L1L2(0.0000032), + initializer='uniform', + trainable=True) + self.b = self.add_weight(name='Attention_Dot_Bias', + regularizer=L1L2(0.00032), + shape=(input_shape[1],), + initializer='uniform', + trainable=True) + super().build(input_shape) + + def call(self, input): + x_transpose = K.permute_dimensions(input, (0, 2, 1)) + x_tanh_softmax = K.softmax(K.tanh(K.dot(x_transpose, self.W) + self.b)) + outputs = K.permute_dimensions(x_tanh_softmax * x_transpose, (0, 2, 1)) + # outputs = K.sum(outputs, axis=1) + return outputs + + def compute_output_shape(self, input_shape): + return input_shape[0], input_shape[1], input_shape[2] + + +class CVG_Layer(Layer): + def __init__(self, embed_size, filter, label, **kwargs): + self.embed_size = embed_size + self.filter = filter + self.label = label + super().__init__(** kwargs) + + def build(self, input_shape): + self._filter = self.add_weight(name=f'filter_{self.filter}', + shape=(self.filter, self.label, 1, 1), + regularizer=L1L2(0.00032), + initializer='uniform', + trainable=True) + self.class_w = self.add_weight(name='class_w', + shape=(self.label, self.embed_size), + regularizer=L1L2(0.0000032), + initializer='uniform', + trainable=True) + self.b = self.add_weight(name='bias', + shape=(1,), + regularizer=L1L2(0.00032), + initializer='uniform', + trainable=True) + super().build(input_shape) + + def call(self, input): + # C * V / G + # l2_normalize of x, y + input_norm = tf.nn.l2_normalize(input) # b * s * e + class_w_relu = tf.nn.relu(self.class_w) # c * e + label_embedding_reshape = tf.transpose(class_w_relu, [1, 0]) # e * c + label_embedding_reshape_norm = tf.nn.l2_normalize(label_embedding_reshape) # e * c + # C * V + G = tf.contrib.keras.backend.dot(input_norm, label_embedding_reshape_norm) # b * s * c + G_transpose = tf.transpose(G, [0, 2, 1]) # b * c * s + G_expand = tf.expand_dims(G_transpose, axis=-1) # b * c * s * 1 + # text_cnn + conv = tf.nn.conv2d(name='conv', input=G_expand, filter=self._filter, + strides=[1, 1, 1, 1], padding='SAME') + pool = tf.nn.relu(name='relu', features=tf.nn.bias_add(conv, self.b)) # b * c * s * 1 + # pool = tf.nn.max_pool(name='pool', value=h, ksize=[1, int((self.filters[0]-1)/2), 1, 1], + # strides=[1, 1, 1, 1], padding='SAME') + # max_pool + pool_squeeze = tf.squeeze(pool, axis=-1) # b * c * s + pool_squeeze_transpose = tf.transpose(pool_squeeze, [0, 2, 1]) # b * s * c + G_max_squeeze = tf.reduce_max(input_tensor=pool_squeeze_transpose, axis=-1, keepdims=True) # b * s * 1 + # divide of softmax + exp_logits = tf.exp(G_max_squeeze) + exp_logits_sum = tf.reduce_sum(exp_logits, axis=1, keepdims=True) + att_v_max = tf.div(exp_logits, exp_logits_sum) + # β * V + x_att = tf.multiply(input, att_v_max) + x_att_sum = tf.reduce_sum(x_att, axis=1) + return x_att_sum + + def compute_output_shape(self, input_shape): + return None, K.int_shape(self.class_w)[1] + + +if __name__=="__main__": + att = AttentionDot() + diff --git a/keras_layers/attention_self.py b/keras_layers/attention_self.py new file mode 100644 index 0000000..c130ff9 --- /dev/null +++ b/keras_layers/attention_self.py @@ -0,0 +1,51 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/22 19:35 +# @author :Mo +# @function :Attention of itself + + +from keras.regularizers import L1L2, Regularizer +# from keras.engine.topology import Layer +from keras.layers import Layer +from keras import backend as K + + +class AttentionSelf(Layer): + """ + self attention, + codes from: https://mp.weixin.qq.com/s/qmJnyFMkXVjYBwoR_AQLVA + """ + def __init__(self, output_dim, **kwargs): + self.output_dim = output_dim + super().__init__(**kwargs) + + def build(self, input_shape): + # W、K and V + self.kernel = self.add_weight(name='WKV', + shape=(3, input_shape[2], self.output_dim), + initializer='uniform', + regularizer=L1L2(0.0000032), + trainable=True) + super().build(input_shape) + + def call(self, x): + WQ = K.dot(x, self.kernel[0]) + WK = K.dot(x, self.kernel[1]) + WV = K.dot(x, self.kernel[2]) + # print("WQ.shape",WQ.shape) + # print("K.permute_dimensions(WK, [0, 2, 1]).shape",K.permute_dimensions(WK, [0, 2, 1]).shape) + QK = K.batch_dot(WQ, K.permute_dimensions(WK, [0, 2, 1])) + QK = QK / (64**0.5) + QK = K.softmax(QK) + # print("QK.shape",QK.shape) + V = K.batch_dot(QK, WV) + return V + + def compute_output_shape(self, input_shape): + return (input_shape[0], input_shape[1], self.output_dim) + + +if __name__=="__main__": + att = AttentionSelf(300) + diff --git a/keras_layers/capsule.py b/keras_layers/capsule.py new file mode 100644 index 0000000..c54ad3a --- /dev/null +++ b/keras_layers/capsule.py @@ -0,0 +1,287 @@ +""" +Some key layers used for constructing a Capsule Network. These layers can used to construct CapsNet on other dataset, +not just on MNIST. +*NOTE*: some functions can be implemented in multiple ways, I keep all of them. You can try them for yourself just by +uncommenting them and commenting their counterparts. + +Author: Xifeng Guo, E-mail: `guoxifeng1990@163.com`, Github: `https://github.com/XifengGuo/CapsNet-Keras` +""" + + +from keras.layers import Activation, Layer +from keras import initializers, layers +import keras.backend as K +import tensorflow as tf + + +class Length(layers.Layer): + """ + Compute the length of vectors. This is used to compute a Tensor that has the same shape with y_true in margin_loss. + Using this layer as model's output can directly predict labels by using `y_pred = np.argmax(model.predict(x), 1)` + inputs: shape=[None, num_vectors, dim_vector] + output: shape=[None, num_vectors] + """ + def call(self, inputs, **kwargs): + return K.sqrt(K.sum(K.square(inputs), -1) + K.epsilon()) + + def compute_output_shape(self, input_shape): + return input_shape[:-1] + + def get_config(self): + config = super(Length, self).get_config() + return config + + +class Mask(layers.Layer): + """ + Mask a Tensor with shape=[None, num_capsule, dim_vector] either by the capsule with max length or by an additional + input mask. Except the max-length capsule (or specified capsule), all vectors are masked to zeros. Then flatten the + masked Tensor. + For example: + ``` + x = keras.layers.Input(shape=[8, 3, 2]) # batch_size=8, each sample contains 3 capsules with dim_vector=2 + y = keras.layers.Input(shape=[8, 3]) # True labels. 8 samples, 3 classes, one-hot coding. + out = Mask()(x) # out.shape=[8, 6] + # or + out2 = Mask()([x, y]) # out2.shape=[8,6]. Masked with true labels y. Of course y can also be manipulated. + ``` + """ + def call(self, inputs, **kwargs): + if type(inputs) is list: # true label is provided with shape = [None, n_classes], i.e. one-hot code. + assert len(inputs) == 2 + inputs, mask = inputs + else: # if no true label, mask by the max length of capsules. Mainly used for prediction + # compute lengths of capsules + x = K.sqrt(K.sum(K.square(inputs), -1)) + # generate the mask which is a one-hot code. + # mask.shape=[None, n_classes]=[None, num_capsule] + mask = K.one_hot(indices=K.argmax(x, 1), num_classes=x.get_shape().as_list()[1]) + + # inputs.shape=[None, num_capsule, dim_capsule] + # mask.shape=[None, num_capsule] + # masked.shape=[None, num_capsule * dim_capsule] + masked = K.batch_flatten(inputs * K.expand_dims(mask, -1)) + return masked + + def compute_output_shape(self, input_shape): + if type(input_shape[0]) is tuple: # true label provided + return tuple([None, input_shape[0][1] * input_shape[0][2]]) + else: # no true label provided + return tuple([None, input_shape[1] * input_shape[2]]) + + def get_config(self): + config = super(Mask, self).get_config() + return config + + +def squash(vectors, axis=-1): + """ + The non-linear activation used in Capsule. It drives the length of a large vector to near 1 and small vector to 0 + :param vectors: some vectors to be squashed, N-dim tensor + :param axis: the axis to squash + :return: a Tensor with same shape as input vectors + """ + s_squared_norm = K.sum(K.square(vectors), axis, keepdims=True) + scale = s_squared_norm / (1 + s_squared_norm) / K.sqrt(s_squared_norm + K.epsilon()) + return scale * vectors + + +class CapsuleLayer(layers.Layer): + """ + The capsule layer. It is similar to Dense layer. Dense layer has `in_num` inputs, each is a scalar, the output of the + neuron from the former layer, and it has `out_num` output neurons. CapsuleLayer just expand the output of the neuron + from scalar to vector. So its input shape = [None, input_num_capsule, input_dim_capsule] and output shape = \ + [None, num_capsule, dim_capsule]. For Dense Layer, input_dim_capsule = dim_capsule = 1. + + :param num_capsule: number of capsules in this layer + :param dim_capsule: dimension of the output vectors of the capsules in this layer + :param routings: number of iterations for the routing algorithm + """ + def __init__(self, num_capsule, dim_capsule, routings=3, + kernel_initializer='glorot_uniform', + **kwargs): + super(CapsuleLayer, self).__init__(**kwargs) + self.num_capsule = num_capsule + self.dim_capsule = dim_capsule + self.routings = routings + self.kernel_initializer = initializers.get(kernel_initializer) + + def build(self, input_shape): + assert len(input_shape) >= 3, "The input Tensor should have shape=[None, input_num_capsule, input_dim_capsule]" + self.input_num_capsule = input_shape[1] + self.input_dim_capsule = input_shape[2] + + # Transform matrix + self.W = self.add_weight(shape=[self.num_capsule, self.input_num_capsule, + self.dim_capsule, self.input_dim_capsule], + initializer=self.kernel_initializer, + name='W') + + self.built = True + + def call(self, inputs, training=None): + # inputs.shape=[None, input_num_capsule, input_dim_capsule] + # inputs_expand.shape=[None, 1, input_num_capsule, input_dim_capsule] + inputs_expand = K.expand_dims(inputs, 1) + + # Replicate num_capsule dimension to prepare being multiplied by W + # inputs_tiled.shape=[None, num_capsule, input_num_capsule, input_dim_capsule] + inputs_tiled = K.tile(inputs_expand, [1, self.num_capsule, 1, 1]) + + # Compute `inputs * W` by scanning inputs_tiled on dimension 0. + # x.shape=[num_capsule, input_num_capsule, input_dim_capsule] + # W.shape=[num_capsule, input_num_capsule, dim_capsule, input_dim_capsule] + # Regard the first two dimensions as `batch` dimension, + # then matmul: [input_dim_capsule] x [dim_capsule, input_dim_capsule]^T -> [dim_capsule]. + # inputs_hat.shape = [None, num_capsule, input_num_capsule, dim_capsule] + inputs_hat = K.map_fn(lambda x: K.batch_dot(x, self.W, [2, 3]), elems=inputs_tiled) + + # Begin: Routing algorithm ---------------------------------------------------------------------# + # The prior for coupling coefficient, initialized as zeros. + # b.shape = [None, self.num_capsule, self.input_num_capsule]. + b = tf.zeros(shape=[K.shape(inputs_hat)[0], self.num_capsule, self.input_num_capsule]) + + assert self.routings > 0, 'The routings should be > 0.' + for i in range(self.routings): + # c.shape=[batch_size, num_capsule, input_num_capsule] + c = tf.nn.softmax(b, dim=1) + + # c.shape = [batch_size, num_capsule, input_num_capsule] + # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule] + # The first two dimensions as `batch` dimension, + # then matmal: [input_num_capsule] x [input_num_capsule, dim_capsule] -> [dim_capsule]. + # outputs.shape=[None, num_capsule, dim_capsule] + outputs = squash(K.batch_dot(c, inputs_hat, [2, 2])) # [None, 10, 16] + + if i < self.routings - 1: + # outputs.shape = [None, num_capsule, dim_capsule] + # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule] + # The first two dimensions as `batch` dimension, + # then matmal: [dim_capsule] x [input_num_capsule, dim_capsule]^T -> [input_num_capsule]. + # b.shape=[batch_size, num_capsule, input_num_capsule] + b += K.batch_dot(outputs, inputs_hat, [2, 3]) + # End: Routing algorithm -----------------------------------------------------------------------# + + return outputs + + def compute_output_shape(self, input_shape): + return tuple([None, self.num_capsule, self.dim_capsule]) + + def get_config(self): + config = { + 'num_capsule': self.num_capsule, + 'dim_capsule': self.dim_capsule, + 'routings': self.routings + } + base_config = super(CapsuleLayer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +def PrimaryCap(inputs, dim_capsule, n_channels, kernel_size, strides, padding): + """ + Apply Conv2D `n_channels` times and concatenate all capsules + :param inputs: 4D tensor, shape=[None, width, height, channels] + :param dim_capsule: the dim of the output vector of capsule + :param n_channels: the number of types of capsules + :return: output tensor, shape=[None, num_capsule, dim_capsule] + """ + output = layers.Conv2D(filters=dim_capsule*n_channels, + kernel_size=kernel_size, + strides=strides, + padding=padding,)(inputs) + outputs = layers.Reshape(target_shape=[-1, dim_capsule], )(output) + + return layers.Lambda(squash, )(outputs) + + +def PrimaryCap_nchannels(inputs, dim_capsule, n_channels, kernel_size, strides, padding): + # The following is another way to implement primary capsule layer. This is much slower. + # Apply Conv2D `n_channels` times and concatenate all capsules + outputs = [] + for _ in range(n_channels): + output = layers.Conv2D(filters=dim_capsule, kernel_size=kernel_size, strides=strides, padding=padding)(inputs) + outputs.append(layers.Reshape([output.get_shape().as_list()[1] ** 2, dim_capsule])(output)) + outputs = layers.Concatenate(axis=1)(outputs) + return layers.Lambda(squash)(outputs) + + + + +def squash_bojone(x, axis=-1): + """ + activation of squash + :param x: vector + :param axis: int + :return: vector + """ + s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + scale = K.sqrt(s_squared_norm + K.epsilon()) + return x / scale + + +class Capsule_bojone(Layer): + """ + # auther: bojone + # explain: A Capsule Implement with Pure Keras + # github: https://github.com/bojone/Capsule/blob/master/Capsule_Keras.py + """ + def __init__(self, num_capsule, dim_capsule, routings=3, kernel_size=(9, 1), + share_weights=True, activation='default', **kwargs): + super(Capsule_bojone, self).__init__(**kwargs) + self.num_capsule = num_capsule + self.dim_capsule = dim_capsule + self.routings = routings + self.kernel_size = kernel_size + self.share_weights = share_weights + if activation == 'default': + self.activation = squash_bojone + else: + self.activation = Activation(activation) + + def build(self, input_shape): + super(Capsule_bojone, self).build(input_shape) + input_dim_capsule = input_shape[-1] + if self.share_weights: + self.W = self.add_weight(name='capsule_kernel', + shape=(1, input_dim_capsule, + self.num_capsule * self.dim_capsule), + # shape=self.kernel_size, + initializer='glorot_uniform', + trainable=True) + else: + input_num_capsule = input_shape[-2] + self.W = self.add_weight(name='capsule_kernel', + shape=(input_num_capsule, + input_dim_capsule, + self.num_capsule * self.dim_capsule), + initializer='glorot_uniform', + trainable=True) + + def call(self, u_vecs): + if self.share_weights: + u_hat_vecs = K.conv1d(u_vecs, self.W) + else: + u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1]) + + batch_size = K.shape(u_vecs)[0] + input_num_capsule = K.shape(u_vecs)[1] + u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule, + self.num_capsule, self.dim_capsule)) + u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3)) + # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule] + + b = K.zeros_like(u_hat_vecs[:, :, :, 0]) # shape = [None, num_capsule, input_num_capsule] + outputs = None + for i in range(self.routings): + b = K.permute_dimensions(b, (0, 2, 1)) # shape = [None, input_num_capsule, num_capsule] + c = K.softmax(b) + c = K.permute_dimensions(c, (0, 2, 1)) + b = K.permute_dimensions(b, (0, 2, 1)) + outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2])) + if i < self.routings - 1: + b = K.batch_dot(outputs, u_hat_vecs, [2, 3]) + + return outputs + + def compute_output_shape(self, input_shape): + return (None, self.num_capsule, self.dim_capsule) \ No newline at end of file diff --git a/keras_layers/highway.py b/keras_layers/highway.py new file mode 100644 index 0000000..ba2442f --- /dev/null +++ b/keras_layers/highway.py @@ -0,0 +1,61 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/22 8:15 +# @author :Mo +# @function : + + +from keras.layers import Layer, Dense +import keras + + +class highway(Layer): + """ + # writter by my own + # paper; Highway Network(http://arxiv.org/abs/1505.00387). + # 公式 + # 1. s = sigmoid(Wx + b) + # 2. z = s * relu(Wx + b) + (1 - s) * x + # x shape : [N * time_depth, sum(filters)] + + # Table 1. CIFAR-10 test set accuracy of convolutional highway networks with + # rectified linear activation and sigmoid gates. + # For comparison, results reported by Romero et al. (2014) + # using maxout networks are also shown. + # Fitnets were trained using a two step training procedure using soft targets from the trained Teacher network, + # which was trained using backpropagation. We trained all highway networks directly using backpropagation. + # * indicates networks which were trained only on a set of 40K out of 50K examples in the training set. + + + + # Figure 2. Visualization of certain internals of the blocks in the best 50 hidden layer highway networks trained on MNIST + # (top row) and CIFAR-100 (bottom row). The first hidden layer is a plain layer which changes the dimensionality of the representation to 50. Each of + # the 49 highway layers (y-axis) consists of 50 blocks (x-axis). + # The first column shows the transform gate biases, which were initialized to -2 and -4 respectively. + # In the second column the mean output of the transform gate over 10,000 training examples is depicted. + # The third and forth columns show the output of the transform gates and + # the block outputs for a single random training sample. + """ + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def build(self, input_shape): + super().build(input_shape) + + def call(self, x): + gate_transform = Dense(units=K.int_shape(x)[1], + activation='sigmoid', + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer=keras.initializers.Constant(value=-2))(x) + gate_cross = 1 - gate_transform + block_state = Dense(units=K.int_shape(x)[1], + activation='relu', + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zero')(x) + high_way = gate_transform * block_state + gate_cross * x + return high_way + + def compute_output_shape(self, input_shape): + return input_shape[0], input_shape[1], input_shape[-1] diff --git a/keras_layers/k_max_pooling.py b/keras_layers/k_max_pooling.py new file mode 100644 index 0000000..5bc3c87 --- /dev/null +++ b/keras_layers/k_max_pooling.py @@ -0,0 +1,36 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/22 7:34 +# @author :Mo +# @function : + + +from keras.layers import Layer +import tensorflow as tf + + +class k_max_pooling(Layer): + """ + paper: http://www.aclweb.org/anthology/P14-1062 + paper title: A Convolutional Neural Network for Modelling Sentences + Reference: https://stackoverflow.com/questions/51299181/how-to-implement-k-max-pooling-in-tensorflow-or-keras + 动态K-max pooling + k的选择为 k = max(k, s * (L-1) / L) + 其中k为预先选定的设置的最大的K个值,s为文本最大长度,L为第几个卷积层的深度(单个卷积到连接层等) + github tf实现可以参考: https://github.com/lpty/classifier/blob/master/a04_dcnn/model.py + """ + def __init__(self, top_k=8, **kwargs): + self.top_k = top_k + super().__init__(**kwargs) + + def build(self, input_shape): + super().build(input_shape) + + def call(self, inputs): + inputs_reshape = tf.transpose(inputs, perm=[0, 2, 1]) + pool_top_k = tf.nn.top_k(input=inputs_reshape, k=self.top_k, sorted=False).values + pool_top_k_reshape = tf.transpose(pool_top_k, perm=[0, 2, 1]) + return pool_top_k_reshape + + def compute_output_shape(self, input_shape): + return input_shape[0], self.top_k, input_shape[-1] diff --git a/keras_layers/keras_lookahead.py b/keras_layers/keras_lookahead.py new file mode 100644 index 0000000..752a554 --- /dev/null +++ b/keras_layers/keras_lookahead.py @@ -0,0 +1,77 @@ +# !/usr/bin/python +# -*- coding: utf-8 -*- +# @time : 2019/11/12 16:14 +# @author : Mo +# @function: lookahead of keras +# @codefrom: https://github.com/bojone/keras_lookahead + + +from keras import backend as K + + +class Lookahead(object): + """Add the [Lookahead Optimizer](https://arxiv.org/abs/1907.08610) functionality for [keras](https://keras.io/). + """ + + def __init__(self, k=5, alpha=0.5): + self.k = k + self.alpha = alpha + self.count = 0 + + def inject(self, model): + """Inject the Lookahead algorithm for the given model. + The following code is modified from keras's _make_train_function method. + See: https://github.com/keras-team/keras/blob/master/keras/engine/training.py#L497 + """ + if not hasattr(model, 'train_function'): + raise RuntimeError('You must compile your model before using it.') + + model._check_trainable_weights_consistency() + + if model.train_function is None: + inputs = (model._feed_inputs + + model._feed_targets + + model._feed_sample_weights) + if model._uses_dynamic_learning_phase(): + inputs += [K.learning_phase()] + fast_params = model._collected_trainable_weights + + with K.name_scope('training'): + with K.name_scope(model.optimizer.__class__.__name__): + training_updates = model.optimizer.get_updates( + params=fast_params, + loss=model.total_loss) + slow_params = [K.variable(p) for p in fast_params] + fast_updates = (model.updates + + training_updates + + model.metrics_updates) + + slow_updates, copy_updates = [], [] + for p, q in zip(fast_params, slow_params): + slow_updates.append(K.update(q, q + self.alpha * (p - q))) + copy_updates.append(K.update(p, q)) + + # Gets loss and metrics. Updates weights at each call. + fast_train_function = K.function( + inputs, + [model.total_loss] + model.metrics_tensors, + updates=fast_updates, + name='fast_train_function', + **model._function_kwargs) + + def F(inputs): + self.count += 1 + R = fast_train_function(inputs) + if self.count % self.k == 0: + K.batch_get_value(slow_updates) + K.batch_get_value(copy_updates) + return R + + model.train_function = F + +if __name__ == '__main__': + gg = 0 + # useage + # model.compile(optimizer=Adam(1e-3), loss='mse') # Any optimizer + # lookahead = Lookahead(k=5, alpha=0.5) # Initialize Lookahead + # lookahead.inject(model) # add into model diff --git a/keras_layers/keras_radam.py b/keras_layers/keras_radam.py new file mode 100644 index 0000000..98f7879 --- /dev/null +++ b/keras_layers/keras_radam.py @@ -0,0 +1,96 @@ +# !/usr/bin/python +# -*- coding: utf-8 -*- +# @time : 2019/11/12 16:12 +# @author : Mo +# @function: radam of keras +# @codefrom: https://github.com/bojone/keras_radam + + +from keras.legacy import interfaces +from keras.optimizers import Optimizer +import keras.backend as K + + +class RAdam(Optimizer): + """RAdam optimizer. + Default parameters follow those provided in the original Adam paper. + # Arguments + lr: float >= 0. Learning rate. + beta_1: float, 0 < beta < 1. Generally close to 1. + beta_2: float, 0 < beta < 1. Generally close to 1. + epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. + decay: float >= 0. Learning rate decay over each update. + amsgrad: boolean. Whether to apply the AMSGrad variant of this + algorithm from the paper "On the Convergence of Adam and + Beyond". + # References + - [RAdam - A Method for Stochastic Optimization] + (https://arxiv.org/abs/1908.03265) + - [On The Variance Of The Adaptive Learning Rate And Beyond] + (https://arxiv.org/abs/1908.03265) + """ + + def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, + epsilon=None, decay=0., **kwargs): + super(RAdam, self).__init__(**kwargs) + with K.name_scope(self.__class__.__name__): + self.iterations = K.variable(0, dtype='int64', name='iterations') + self.lr = K.variable(lr, name='lr') + self.beta_1 = K.variable(beta_1, name='beta_1') + self.beta_2 = K.variable(beta_2, name='beta_2') + self.decay = K.variable(decay, name='decay') + if epsilon is None: + epsilon = K.epsilon() + self.epsilon = epsilon + self.initial_decay = decay + + @interfaces.legacy_get_updates_support + def get_updates(self, loss, params): + grads = self.get_gradients(loss, params) + self.updates = [K.update_add(self.iterations, 1)] + + lr = self.lr + if self.initial_decay > 0: + lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, + K.dtype(self.decay)))) + + t = K.cast(self.iterations, K.floatx()) + 1 + beta_1_t = K.pow(self.beta_1, t) + beta_2_t = K.pow(self.beta_2, t) + rho = 2 / (1 - self.beta_2) - 1 + rho_t = rho - 2 * t * beta_2_t / (1 - beta_2_t) + r_t = K.sqrt( + K.relu(rho_t - 4) * K.relu(rho_t - 2) * rho / ((rho - 4) * (rho - 2) * rho_t) + ) + flag = K.cast(rho_t > 4, K.floatx()) + + ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] + vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] + self.weights = [self.iterations] + ms + vs + + for p, g, m, v in zip(params, grads, ms, vs): + m_t = (self.beta_1 * m) + (1. - self.beta_1) * g + v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) + mhat_t = m_t / (1 - beta_1_t) + vhat_t = K.sqrt(v_t / (1 - beta_2_t)) + p_t = p - lr * mhat_t * (flag * r_t / (vhat_t + self.epsilon) + (1 - flag)) + + self.updates.append(K.update(m, m_t)) + self.updates.append(K.update(v, v_t)) + new_p = p_t + + # Apply constraints. + if getattr(p, 'constraint', None) is not None: + new_p = p.constraint(new_p) + + self.updates.append(K.update(p, new_p)) + return self.updates + + def get_config(self): + config = {'lr': float(K.get_value(self.lr)), + 'beta_1': float(K.get_value(self.beta_1)), + 'beta_2': float(K.get_value(self.beta_2)), + 'decay': float(K.get_value(self.decay)), + 'epsilon': self.epsilon} + base_config = super(RAdam, self).get_config() + return dict(list(base_config.items()) + list(config.items())) \ No newline at end of file diff --git a/keras_layers/non_mask_layer.py b/keras_layers/non_mask_layer.py new file mode 100644 index 0000000..edf89f4 --- /dev/null +++ b/keras_layers/non_mask_layer.py @@ -0,0 +1,32 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/7/10 10:35 +# @author :Mo +# @function : + +from __future__ import print_function, division +from keras.engine import Layer + + +class NonMaskingLayer(Layer): + """ + fix convolutional 1D can't receive masked input, detail: https://github.com/keras-team/keras/issues/4978 + thanks for https://github.com/jacoxu + """ + + def __init__(self, **kwargs): + self.supports_masking = True + super(NonMaskingLayer, self).__init__(**kwargs) + + def build(self, input_shape): + pass + + def compute_mask(self, input, input_mask=None): + # do not pass the mask to the next layers + return None + + def call(self, x, mask=None): + return x + + def compute_output_shape(self, input_shape): + return input_shape \ No newline at end of file diff --git a/keras_layers/transformer.py b/keras_layers/transformer.py new file mode 100644 index 0000000..aa01eff --- /dev/null +++ b/keras_layers/transformer.py @@ -0,0 +1,577 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/7/22 10:55 +# @author :Mo +# @function : + +from keras_layers.transformer_utils.triangle_position_embedding import TriglePositiomEmbedding +from keras_layers.transformer_utils.multi_head_attention import MultiHeadAttention +from keras_layers.transformer_utils.layer_normalization import LayerNormalization +from keras_layers.transformer_utils.embedding import EmbeddingRet, EmbeddingSim +from keras_layers.transformer_utils.feedforward import FeedForward +import numpy as np +import keras + + +def _common_wrap_layer(name, + input_layer, + build_func, + dropout_rate=0.0, + trainable=True, + use_adapter=False, + use_star=False, + adapter_units=None, + adapter_activation='relu'): + """Wrap layers with residual, normalization and dropout. + + :param name: Prefix of names for internal layers. + :param input_layer: Input layer. + :param build_func: A callable that takes the input tensor and generates the output tensor. + :param dropout_rate: Dropout rate. + :param trainable: Whether the layers are trainable. + :param use_star:Whether to use star-transformer. + :param use_adapter: Whether to use feed-forward adapters before each residual connections. + :param adapter_units: The dimension of the first transformation in feed-forward adapter. + :param adapter_activation: The activation after the first transformation in feed-forward adapter. + :return: Output layer. + """ + build_output = build_func(input_layer) + if dropout_rate > 0.0: + dropout_layer = keras.layers.Dropout(rate=dropout_rate, + name='%s-Dropout' % name,)(build_output) + else: + dropout_layer = build_output + if isinstance(input_layer, list): + input_layer = input_layer[0] + if use_adapter: # 使用 activation + adapter = FeedForward(units=adapter_units, + activation=adapter_activation, + kernel_initializer=keras.initializers.TruncatedNormal(mean=0.0, stddev=0.001), + name='%s-Adapter' % name,)(dropout_layer) + if use_star: # 使用star-transformer, 就不用residual + dropout_layer = adapter + else: + dropout_layer = keras.layers.Add(name='%s-Adapter-Add' % name)([dropout_layer, adapter]) + if use_star: # 使用star-transformer + add_layer = keras.layers.Activation(adapter_activation)(dropout_layer) + else: + add_layer = keras.layers.Add(name='%s-Add' % name)([input_layer, dropout_layer]) + normal_layer = LayerNormalization(trainable=trainable, + name='%s-Norm' % name,)(add_layer) + return normal_layer + + +def build_attention(name, + head_num, + activation, + history_only, + trainable=True): + """Get multi-head self-attention builder. + + :param name: Prefix of names for internal layers. + :param head_num: Number of heads in multi-head self-attention. + :param activation: Activation for multi-head self-attention. + :param history_only: Only use history data. + :param trainable: Whether the layer is trainable. + :return: + """ + + def _build_attention(x): + return MultiHeadAttention(head_num=head_num, + activation=activation, + history_only=history_only, + trainable=trainable, + name=name,)(x) + + return _build_attention + + +def build_feed_forward(name, + hidden_dim, + activation, + trainable=True): + """Get position-wise feed-forward layer builder. + + :param name: Prefix of names for internal layers. + :param hidden_dim: Hidden dimension of feed forward layer. + :param activation: Activation for feed-forward layer. + :param trainable: Whether the layer is trainable. + :return: + """ + + def _build_feed_forward(x): + return FeedForward( units=hidden_dim, + activation=activation, + trainable=trainable, + name=name,)(x) + + return _build_feed_forward + + +def get_encoder_layers(name, + input_layer, + head_num, + hidden_dim, + attention_activation=None, + feed_forward_activation='relu', + dropout_rate=0.0, + trainable=True, + use_star=False, + use_adapter=False, + adapter_units=None, + adapter_activation='relu'): + """Multi-head self-attention and feed-forward layer. + + :param name: Prefix of names for internal layers. + :param input_layer: Input layer. + :param head_num: Number of heads in multi-head self-attention. + :param hidden_dim: Hidden dimension of feed forward layer. + :param attention_activation: Activation for multi-head self-attention. + :param feed_forward_activation: Activation for feed-forward layer. + :param dropout_rate: Dropout rate. + :param trainable: Whether the layers are trainable. + :param use_star:Whether to use star-transformer + :param use_adapter: if use star-transformer, use_adapter=True. Whether to use feed-forward adapters before each residual connections. + :param adapter_units: The dimension of the first transformation in feed-forward adapter. + :param adapter_activation: The activation after the first transformation in feed-forward adapter. + :return: Output layer. + """ + if use_star: + attention_name_1 = '%s-1-MultiHeadSelfAttention' % name + attention_name_2 = '%s-2-MultiHeadSelfAttention' % name + # (batch_size, seq_len, d_model) = keras.backend.int_shape(input_layer) + # h_extand = keras.backend.zeros((batch_size, seq_len + 2, d_model), dtype=keras.backend.floatx()) + # h_extand[:, 1:seq_len + 1, :] = input_layer # head and tail padding(not cycle) + # input_layer = input_layer.reshape([batch_size, 1, d_model]) + # s_expand = input_layer.expand([batch_size, seq_len, d_model]) + # context = keras.backend.concatenate((h_extand[:, 0:seq_len, :], + # h_extand[:, 1:seq_len + 1, :], + # h_extand[:, 2:seq_len + 2, :], + # input_layer, + # s_expand), 2) + # context = context.reshape([batch_size * seq_len, 5, d_model]) + # h = input_layer.reshape([batch_size * seq_len, 1, d_model]) + # + # h, _ = self.slf_attn_satellite( + + attention_layer = _common_wrap_layer(name=attention_name_1, + input_layer=input_layer, + build_func=build_attention(name=attention_name_1, + head_num=head_num, + activation=attention_activation, + history_only=False, + trainable=trainable, + ), + dropout_rate=dropout_rate, + trainable=trainable, + use_star=use_star, + use_adapter=use_adapter, + adapter_units=adapter_units, + adapter_activation=adapter_activation,) + feed_forward_layer = _common_wrap_layer(name=attention_name_2, + input_layer=attention_layer, + build_func=build_attention(name=attention_name_2, + head_num=head_num, + activation=attention_activation, + history_only=False, + trainable=trainable, + ), + dropout_rate=dropout_rate, + trainable=trainable, + use_star=use_star, + use_adapter=use_adapter, + adapter_units=adapter_units, + adapter_activation=adapter_activation,) + else: + attention_name = '%s-MultiHeadSelfAttention' % name + feed_forward_name = '%s-FeedForward' % name + attention_layer = _common_wrap_layer(name=attention_name, + input_layer=input_layer, + build_func=build_attention(name=attention_name, + head_num=head_num, + activation=attention_activation, + history_only=False, + trainable=trainable, + ), + dropout_rate=dropout_rate, + trainable=trainable, + use_star=use_star, + use_adapter=use_adapter, + adapter_units=adapter_units, + adapter_activation=adapter_activation,) + feed_forward_layer = _common_wrap_layer(name=feed_forward_name, + input_layer=attention_layer, + build_func=build_feed_forward(name=feed_forward_name, + hidden_dim=hidden_dim, + activation=feed_forward_activation, + trainable=trainable,), + dropout_rate=dropout_rate, + trainable=trainable, + use_star=use_star, + use_adapter=use_adapter, + adapter_units=adapter_units, + adapter_activation=adapter_activation,) + return feed_forward_layer + + +def get_decoder_layers(name, + input_layer, + encoded_layer, + head_num, + hidden_dim, + attention_activation=None, + feed_forward_activation='relu', + dropout_rate=0.0, + trainable=True, + use_adapter=False, + adapter_units=None, + adapter_activation='relu'): + """Multi-head self-attention, multi-head query attention and feed-forward layer. + + :param name: Prefix of names for internal layers. + :param input_layer: Input layer. + :param encoded_layer: Encoded layer from encoder. + :param head_num: Number of heads in multi-head self-attention. + :param hidden_dim: Hidden dimension of feed forward layer. + :param attention_activation: Activation for multi-head self-attention. + :param feed_forward_activation: Activation for feed-forward layer. + :param dropout_rate: Dropout rate. + :param trainable: Whether the layers are trainable. + :param use_adapter: Whether to use feed-forward adapters before each residual connections. + :param adapter_units: The dimension of the first transformation in feed-forward adapter. + :param adapter_activation: The activation after the first transformation in feed-forward adapter. + :return: Output layer. + """ + self_attention_name = '%s-MultiHeadSelfAttention' % name + query_attention_name = '%s-MultiHeadQueryAttention' % name + feed_forward_name = '%s-FeedForward' % name + self_attention_layer = _common_wrap_layer(name=self_attention_name, + input_layer=input_layer, + build_func=build_attention(name=self_attention_name, + head_num=head_num, + activation=attention_activation, + history_only=True, + trainable=trainable, ), + dropout_rate=dropout_rate, + trainable=trainable, + use_adapter=use_adapter, + adapter_units=adapter_units, + adapter_activation=adapter_activation, + ) + query_attention_layer = _common_wrap_layer(name=query_attention_name, + input_layer=[self_attention_layer, encoded_layer, encoded_layer], + build_func=build_attention(name=query_attention_name, + head_num=head_num, + activation=attention_activation, + history_only=False, + trainable=trainable, + ), + dropout_rate=dropout_rate, + trainable=trainable, + use_adapter=use_adapter, + adapter_units=adapter_units, + adapter_activation=adapter_activation, + ) + feed_forward_layer = _common_wrap_layer(name=feed_forward_name, + input_layer=query_attention_layer, + build_func=build_feed_forward(name=feed_forward_name, + hidden_dim=hidden_dim, + activation=feed_forward_activation, + trainable=trainable, + ), + dropout_rate=dropout_rate, + trainable=trainable, + use_adapter=use_adapter, + adapter_units=adapter_units, + adapter_activation=adapter_activation, + ) + return feed_forward_layer + + +def build_encoders(encoder_num, + input_layer, + head_num, + hidden_dim, + attention_activation=None, + feed_forward_activation='relu', + dropout_rate=0.0, + trainable=True, + use_adapter=False, + adapter_units=None, + adapter_activation='relu'): + """Get encoders. + + :param encoder_num: Number of encoder components. + :param input_layer: Input layer. + :param head_num: Number of heads in multi-head self-attention. + :param hidden_dim: Hidden dimension of feed forward layer. + :param attention_activation: Activation for multi-head self-attention. + :param feed_forward_activation: Activation for feed-forward layer. + :param dropout_rate: Dropout rate. + :param trainable: Whether the layers are trainable. + :param use_adapter: Whether to use feed-forward adapters before each residual connections. + :param adapter_units: The dimension of the first transformation in feed-forward adapter. + :param adapter_activation: The activation after the first transformation in feed-forward adapter. + :return: Output layer. + """ + last_layer = input_layer + for i in range(encoder_num): + last_layer = get_encoder_layers(name='Encoder-%d' % (i + 1), + input_layer=last_layer, + head_num=head_num, + hidden_dim=hidden_dim, + attention_activation=attention_activation, + feed_forward_activation=feed_forward_activation, + dropout_rate=dropout_rate, + trainable=trainable, + use_adapter=use_adapter, + adapter_units=adapter_units, + adapter_activation=adapter_activation, ) + return last_layer + + +def build_decoders(decoder_num, + input_layer, + encoded_layer, + head_num, + hidden_dim, + attention_activation=None, + feed_forward_activation='relu', + dropout_rate=0.0, + trainable=True, + use_adapter=False, + adapter_units=None, + adapter_activation='relu'): + """Get decoders. + + :param decoder_num: Number of decoder components. + :param input_layer: Input layer. + :param encoded_layer: Encoded layer from encoder. + :param head_num: Number of heads in multi-head self-attention. + :param hidden_dim: Hidden dimension of feed forward layer. + :param attention_activation: Activation for multi-head self-attention. + :param feed_forward_activation: Activation for feed-forward layer. + :param dropout_rate: Dropout rate. + :param trainable: Whether the layers are trainable. + :param use_adapter: Whether to use feed-forward adapters before each residual connections. + :param adapter_units: The dimension of the first transformation in feed-forward adapter. + :param adapter_activation: The activation after the first transformation in feed-forward adapter. + :return: Output layer. + """ + last_layer = input_layer + for i in range(decoder_num): + last_layer = get_decoder_layers(name='Decoder-%d' % (i + 1), + input_layer=last_layer, + encoded_layer=encoded_layer, + head_num=head_num, + hidden_dim=hidden_dim, + attention_activation=attention_activation, + feed_forward_activation=feed_forward_activation, + dropout_rate=dropout_rate, + trainable=trainable, + use_adapter=use_adapter, + adapter_units=adapter_units, + adapter_activation=adapter_activation, ) + return last_layer + + +def build_transformer_model(token_num, + embed_dim, + encoder_num, + decoder_num, + head_num, + hidden_dim, + attention_activation=None, + feed_forward_activation='relu', + dropout_rate=0.0, + use_same_embed=True, + embed_weights=None, + embed_trainable=None, + trainable=True, + use_adapter=False, + adapter_units=None, + adapter_activation='relu'): + """Get full model without compilation. + + :param token_num: Number of distinct tokens. + :param embed_dim: Dimension of token embedding. + :param encoder_num: Number of encoder components. + :param decoder_num: Number of decoder components. + :param head_num: Number of heads in multi-head self-attention. + :param hidden_dim: Hidden dimension of feed forward layer. + :param attention_activation: Activation for multi-head self-attention. + :param feed_forward_activation: Activation for feed-forward layer. + :param dropout_rate: Dropout rate. + :param use_same_embed: Whether to use the same token embedding layer. `token_num`, `embed_weights` and + `embed_trainable` should be lists of two elements if it is False. + :param embed_weights: Initial weights of token embedding. + :param embed_trainable: Whether the token embedding is trainable. It will automatically set to False if the given + value is None when embedding weights has been provided. + :param trainable: Whether the layers are trainable. + :param use_adapter: Whether to use feed-forward adapters before each residual connections. + :param adapter_units: The dimension of the first transformation in feed-forward adapter. + :param adapter_activation: The activation after the first transformation in feed-forward adapter. + :return: Keras model. + """ + if not isinstance(token_num, list): + token_num = [token_num, token_num] + encoder_token_num, decoder_token_num = token_num + + if not isinstance(embed_weights, list): + embed_weights = [embed_weights, embed_weights] + encoder_embed_weights, decoder_embed_weights = embed_weights + if encoder_embed_weights is not None: + encoder_embed_weights = [encoder_embed_weights] + if decoder_embed_weights is not None: + decoder_embed_weights = [decoder_embed_weights] + + if not isinstance(embed_trainable, list): + embed_trainable = [embed_trainable, embed_trainable] + encoder_embed_trainable, decoder_embed_trainable = embed_trainable + if encoder_embed_trainable is None: + encoder_embed_trainable = encoder_embed_weights is None + if decoder_embed_trainable is None: + decoder_embed_trainable = decoder_embed_weights is None + + if use_same_embed: + encoder_embed_layer = decoder_embed_layer = EmbeddingRet(input_dim=encoder_token_num, + output_dim=embed_dim, + mask_zero=True, + weights=encoder_embed_weights, + trainable=encoder_embed_trainable, + name='Token-Embedding', + ) + else: + encoder_embed_layer = EmbeddingRet(input_dim=encoder_token_num, + output_dim=embed_dim, + mask_zero=True, + weights=encoder_embed_weights, + trainable=encoder_embed_trainable, + name='Encoder-Token-Embedding', + ) + decoder_embed_layer = EmbeddingRet(input_dim=decoder_token_num, + output_dim=embed_dim, + mask_zero=True, + weights=decoder_embed_weights, + trainable=decoder_embed_trainable, + name='Decoder-Token-Embedding', + ) + encoder_input = keras.layers.Input(shape=(None,), name='Encoder-Input') + encoder_embed = TriglePositiomEmbedding(mode=TriglePositiomEmbedding.MODE_ADD, + name='Encoder-Embedding', )(encoder_embed_layer(encoder_input)[0]) + encoded_layer = build_encoders(encoder_num=encoder_num, + input_layer=encoder_embed, + head_num=head_num, + hidden_dim=hidden_dim, + attention_activation=attention_activation, + feed_forward_activation=feed_forward_activation, + dropout_rate=dropout_rate, + trainable=trainable, + use_adapter=use_adapter, + adapter_units=adapter_units, + adapter_activation=adapter_activation, ) + decoder_input = keras.layers.Input(shape=(None,), name='Decoder-Input') + decoder_embed, decoder_embed_weights = decoder_embed_layer(decoder_input) + decoder_embed = TriglePositiomEmbedding(mode=TriglePositiomEmbedding.MODE_ADD, + name='Decoder-Embedding', )(decoder_embed) + decoded_layer = build_decoders(decoder_num=decoder_num, + input_layer=decoder_embed, + encoded_layer=encoded_layer, + head_num=head_num, + hidden_dim=hidden_dim, + attention_activation=attention_activation, + feed_forward_activation=feed_forward_activation, + dropout_rate=dropout_rate, + trainable=trainable, + use_adapter=use_adapter, + adapter_units=adapter_units, + adapter_activation=adapter_activation, ) + dense_layer = EmbeddingSim(trainable=trainable, + name='Output', )([decoded_layer, decoder_embed_weights]) + return keras.models.Model(inputs=[encoder_input, decoder_input], outputs=dense_layer) + + +def get_max_suffix_repeat_times(tokens, max_len): + detect_len = min(max_len, len(tokens)) + next = [-1] * detect_len + k = -1 + for i in range(1, detect_len): + while k >= 0 and tokens[len(tokens) - i - 1] != tokens[len(tokens) - k - 2]: + k = next[k] + if tokens[len(tokens) - i - 1] == tokens[len(tokens) - k - 2]: + k += 1 + next[i] = k + max_repeat = 1 + for i in range(2, detect_len): + if next[i] >= 0 and (i + 1) % (i - next[i]) == 0: + max_repeat = max(max_repeat, (i + 1) // (i - next[i])) + return max_repeat + + +def decode(model, + tokens, + start_token, + end_token, + pad_token, + top_k=1, + temperature=1.0, + max_len=10000, + max_repeat=10, + max_repeat_block=10): + """Decode with the given model and input tokens. + + :param model: The trained model. + :param tokens: The input tokens of encoder. + :param start_token: The token that represents the start of a sentence. + :param end_token: The token that represents the end of a sentence. + :param pad_token: The token that represents padding. + :param top_k: Choose the last token from top K. + :param temperature: Randomness in boltzmann distribution. + :param max_len: Maximum length of decoded list. + :param max_repeat: Maximum number of repeating blocks. + :param max_repeat_block: Maximum length of the repeating block. + :return: Decoded tokens. + """ + is_single = not isinstance(tokens[0], list) + if is_single: + tokens = [tokens] + batch_size = len(tokens) + decoder_inputs = [[start_token] for _ in range(batch_size)] + outputs = [None for _ in range(batch_size)] + output_len = 1 + while len(list(filter(lambda x: x is None, outputs))) > 0: + output_len += 1 + batch_inputs, batch_outputs = [], [] + max_input_len = 0 + index_map = {} + for i in range(batch_size): + if outputs[i] is None: + index_map[len(batch_inputs)] = i + batch_inputs.append(tokens[i][:]) + batch_outputs.append(decoder_inputs[i]) + max_input_len = max(max_input_len, len(tokens[i])) + for i in range(len(batch_inputs)): + batch_inputs[i] += [pad_token] * (max_input_len - len(batch_inputs[i])) + predicts = model.predict([np.array(batch_inputs), np.array(batch_outputs)]) + for i in range(len(predicts)): + if top_k == 1: + last_token = predicts[i][-1].argmax(axis=-1) + else: + probs = [(prob, j) for j, prob in enumerate(predicts[i][-1])] + probs.sort(reverse=True) + probs = probs[:top_k] + indices, probs = list(map(lambda x: x[1], probs)), list(map(lambda x: x[0], probs)) + probs = np.array(probs) / temperature + probs = probs - np.max(probs) + probs = np.exp(probs) + probs = probs / np.sum(probs) + last_token = np.random.choice(indices, p=probs) + decoder_inputs[index_map[i]].append(last_token) + if last_token == end_token or (max_len is not None and output_len >= max_len) or\ + get_max_suffix_repeat_times(decoder_inputs, max_repeat * max_repeat_block) >= max_repeat: + outputs[index_map[i]] = decoder_inputs[index_map[i]] + if is_single: + outputs = outputs[0] + return outputs + + diff --git a/keras_layers/transformer_utils/__init__.py b/keras_layers/transformer_utils/__init__.py new file mode 100644 index 0000000..9753908 --- /dev/null +++ b/keras_layers/transformer_utils/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/7/22 9:15 +# @author :Mo +# @function : \ No newline at end of file diff --git a/keras_layers/transformer_utils/embedding.py b/keras_layers/transformer_utils/embedding.py new file mode 100644 index 0000000..3acdad9 --- /dev/null +++ b/keras_layers/transformer_utils/embedding.py @@ -0,0 +1,91 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/7/22 10:00 +# @author :Mo +# @function : + +from keras.layers import Embedding, Layer +import keras.backend as K +import keras + + +class EmbeddingRet(Embedding): + """Embedding layer with weights returned.""" + + def compute_output_shape(self, input_shape): + return [super(EmbeddingRet, self).compute_output_shape(input_shape), + (self.input_dim, self.output_dim), + ] + + def compute_mask(self, inputs, mask=None): + return [super(EmbeddingRet, self).compute_mask(inputs, mask), + None, + ] + + def call(self, inputs): + return [super(EmbeddingRet, self).call(inputs), + self.embeddings, + ] + + +class EmbeddingSim(Layer): + """Calculate similarity between features and token embeddings with bias term.""" + + def __init__(self, + use_bias=True, + initializer='zeros', + regularizer=None, + constraint=None, + **kwargs): + """Initialize the layer. + + :param output_dim: Same as embedding output dimension. + :param use_bias: Whether to use bias term. + :param initializer: Initializer for bias. + :param regularizer: Regularizer for bias. + :param constraint: Constraint for bias. + :param kwargs: Arguments for parent class. + """ + super(EmbeddingSim, self).__init__(**kwargs) + self.supports_masking = True + self.use_bias = use_bias + self.initializer = keras.initializers.get(initializer) + self.regularizer = keras.regularizers.get(regularizer) + self.constraint = keras.constraints.get(constraint) + self.bias = None + + def get_config(self): + config = {'use_bias': self.use_bias, + 'initializer': keras.initializers.serialize(self.initializer), + 'regularizer': keras.regularizers.serialize(self.regularizer), + 'constraint': keras.constraints.serialize(self.constraint), + } + base_config = super(EmbeddingSim, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def build(self, input_shape): + if self.use_bias: + embed_shape = input_shape[1] + token_num = embed_shape[0] + self.bias = self.add_weight(shape=(token_num,), + initializer=self.initializer, + regularizer=self.regularizer, + constraint=self.constraint, + name='bias', + ) + super(EmbeddingSim, self).build(input_shape) + + def compute_output_shape(self, input_shape): + feature_shape, embed_shape = input_shape + token_num = embed_shape[0] + return feature_shape[:-1] + (token_num,) + + def compute_mask(self, inputs, mask=None): + return mask[0] + + def call(self, inputs, mask=None, **kwargs): + inputs, embeddings = inputs + outputs = K.dot(inputs, K.transpose(embeddings)) + if self.use_bias: + outputs = K.bias_add(outputs, self.bias) + return keras.activations.softmax(outputs) diff --git a/keras_layers/transformer_utils/feedforward.py b/keras_layers/transformer_utils/feedforward.py new file mode 100644 index 0000000..fe43f2f --- /dev/null +++ b/keras_layers/transformer_utils/feedforward.py @@ -0,0 +1,122 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/7/22 9:16 +# @author :Mo +# @function : + + + +from keras.layers import Layer +import keras.backend as K +import keras + + +class FeedForward(Layer): + """Position-wise feed-forward layer. + + See: https://arxiv.org/pdf/1706.03762.pdf + """ + + def __init__(self, + units, + activation='relu', + use_bias=True, + kernel_initializer='glorot_normal', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs): + """Initialize the layer. + + :param units: Dimension of hidden units. + :param activation: Activation for the first linear transformation. + :param use_bias: Whether to use the bias term. + :param kernel_initializer: Initializer for kernels. + :param bias_initializer: Initializer for kernels. + :param kernel_regularizer: Regularizer for kernels. + :param bias_regularizer: Regularizer for kernels. + :param kernel_constraint: Constraint for kernels. + :param bias_constraint: Constraint for kernels. + :param kwargs: + """ + self.supports_masking = True + self.units = units + self.activation = keras.activations.get(activation) + self.use_bias = use_bias + self.kernel_initializer = keras.initializers.get(kernel_initializer) + self.bias_initializer = keras.initializers.get(bias_initializer) + self.kernel_regularizer = keras.regularizers.get(kernel_regularizer) + self.bias_regularizer = keras.regularizers.get(bias_regularizer) + self.kernel_constraint = keras.constraints.get(kernel_constraint) + self.bias_constraint = keras.constraints.get(bias_constraint) + self.W1, self.b1 = None, None + self.W2, self.b2 = None, None + super(FeedForward, self).__init__(**kwargs) + + def get_config(self): + config = { + 'units': self.units, + 'activation': keras.activations.serialize(self.activation), + 'use_bias': self.use_bias, + 'kernel_initializer': keras.initializers.serialize(self.kernel_initializer), + 'bias_initializer': keras.initializers.serialize(self.bias_initializer), + 'kernel_regularizer': keras.regularizers.serialize(self.kernel_regularizer), + 'bias_regularizer': keras.regularizers.serialize(self.bias_regularizer), + 'kernel_constraint': keras.constraints.serialize(self.kernel_constraint), + 'bias_constraint': keras.constraints.serialize(self.bias_constraint), + } + base_config = super(FeedForward, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def compute_output_shape(self, input_shape): + return input_shape + + def compute_mask(self, inputs, input_mask=None): + return input_mask + + def build(self, input_shape): + feature_dim = input_shape[-1] + self.W1 = self.add_weight( + shape=(feature_dim, self.units), + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + name='{}_W1'.format(self.name), + ) + if self.use_bias: + self.b1 = self.add_weight( + shape=(self.units,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + name='{}_b1'.format(self.name), + ) + self.W2 = self.add_weight( + shape=(self.units, feature_dim), + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + name='{}_W2'.format(self.name), + ) + if self.use_bias: + self.b2 = self.add_weight( + shape=(feature_dim,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + name='{}_b2'.format(self.name), + ) + super(FeedForward, self).build(input_shape) + + def call(self, x, mask=None): + h = K.dot(x, self.W1) + if self.use_bias: + h = K.bias_add(h, self.b1) + if self.activation is not None: + h = self.activation(h) + y = K.dot(h, self.W2) + if self.use_bias: + y = K.bias_add(y, self.b2) + return y diff --git a/keras_layers/transformer_utils/layer_normalization.py b/keras_layers/transformer_utils/layer_normalization.py new file mode 100644 index 0000000..e8594f1 --- /dev/null +++ b/keras_layers/transformer_utils/layer_normalization.py @@ -0,0 +1,107 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/7/22 9:25 +# @author :Mo +# @function : + + +from keras.layers import Layer +import keras.backend as K +import keras + + +class LayerNormalization(Layer): + + def __init__(self, + center=True, + scale=True, + epsilon=None, + gamma_initializer='ones', + beta_initializer='zeros', + gamma_regularizer=None, + beta_regularizer=None, + gamma_constraint=None, + beta_constraint=None, + **kwargs): + """Layer normalization layer + + See: [Layer Normalization](https://arxiv.org/pdf/1607.06450.pdf) + + :param center: Add an offset parameter if it is True. + :param scale: Add a scale parameter if it is True. + :param epsilon: Epsilon for calculating variance. + :param gamma_initializer: Initializer for the gamma weight. + :param beta_initializer: Initializer for the beta weight. + :param gamma_regularizer: Optional regularizer for the gamma weight. + :param beta_regularizer: Optional regularizer for the beta weight. + :param gamma_constraint: Optional constraint for the gamma weight. + :param beta_constraint: Optional constraint for the beta weight. + :param kwargs: + """ + super(LayerNormalization, self).__init__(**kwargs) + self.supports_masking = True + self.center = center + self.scale = scale + if epsilon is None: + epsilon = K.epsilon() * K.epsilon() + self.epsilon = epsilon + self.gamma_initializer = keras.initializers.get(gamma_initializer) + self.beta_initializer = keras.initializers.get(beta_initializer) + self.gamma_regularizer = keras.regularizers.get(gamma_regularizer) + self.beta_regularizer = keras.regularizers.get(beta_regularizer) + self.gamma_constraint = keras.constraints.get(gamma_constraint) + self.beta_constraint = keras.constraints.get(beta_constraint) + self.gamma, self.beta = None, None + + def get_config(self): + config = { + 'center': self.center, + 'scale': self.scale, + 'epsilon': self.epsilon, + 'gamma_initializer': keras.initializers.serialize(self.gamma_initializer), + 'beta_initializer': keras.initializers.serialize(self.beta_initializer), + 'gamma_regularizer': keras.regularizers.serialize(self.gamma_regularizer), + 'beta_regularizer': keras.regularizers.serialize(self.beta_regularizer), + 'gamma_constraint': keras.constraints.serialize(self.gamma_constraint), + 'beta_constraint': keras.constraints.serialize(self.beta_constraint), + } + base_config = super(LayerNormalization, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def compute_output_shape(self, input_shape): + return input_shape + + def compute_mask(self, inputs, input_mask=None): + return input_mask + + def build(self, input_shape): + self.input_spec = keras.engine.InputSpec(shape=input_shape) + shape = input_shape[-1:] + if self.scale: + self.gamma = self.add_weight( + shape=shape, + initializer=self.gamma_initializer, + regularizer=self.gamma_regularizer, + constraint=self.gamma_constraint, + name='gamma', + ) + if self.center: + self.beta = self.add_weight( + shape=shape, + initializer=self.beta_initializer, + regularizer=self.beta_regularizer, + constraint=self.beta_constraint, + name='beta', + ) + super(LayerNormalization, self).build(input_shape) + + def call(self, inputs, training=None): + mean = K.mean(inputs, axis=-1, keepdims=True) + variance = K.mean(K.square(inputs - mean), axis=-1, keepdims=True) + std = K.sqrt(variance + self.epsilon) + outputs = (inputs - mean) / std + if self.scale: + outputs *= self.gamma + if self.center: + outputs += self.beta + return outputs diff --git a/keras_layers/transformer_utils/multi_head_attention.py b/keras_layers/transformer_utils/multi_head_attention.py new file mode 100644 index 0000000..949fa4b --- /dev/null +++ b/keras_layers/transformer_utils/multi_head_attention.py @@ -0,0 +1,225 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/7/22 9:27 +# @author :Mo +# @function : + + +from keras_layers.transformer_utils.scale_dot_product_attention import ScaledDotProductAttention +from keras.layers import Layer +import keras.backend as K +import keras + + +class MultiHeadAttention(Layer): + """Multi-head attention layer. + + See: https://arxiv.org/pdf/1706.03762.pdf + """ + + def __init__(self, + head_num, + activation='relu', + use_bias=True, + kernel_initializer='glorot_normal', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + history_only=False, + **kwargs): + """Initialize the layer. + + :param head_num: Number of heads. + :param activation: Activations for linear mappings. + :param use_bias: Whether to use bias term. + :param kernel_initializer: Initializer for linear mappings. + :param bias_initializer: Initializer for linear mappings. + :param kernel_regularizer: Regularizer for linear mappings. + :param bias_regularizer: Regularizer for linear mappings. + :param kernel_constraint: Constraints for linear mappings. + :param bias_constraint: Constraints for linear mappings. + :param history_only: Whether to only use history in attention layer. + """ + self.supports_masking = True + self.head_num = head_num + self.activation = keras.activations.get(activation) + self.use_bias = use_bias + self.kernel_initializer = keras.initializers.get(kernel_initializer) + self.bias_initializer = keras.initializers.get(bias_initializer) + self.kernel_regularizer = keras.regularizers.get(kernel_regularizer) + self.bias_regularizer = keras.regularizers.get(bias_regularizer) + self.kernel_constraint = keras.constraints.get(kernel_constraint) + self.bias_constraint = keras.constraints.get(bias_constraint) + self.history_only = history_only + + self.Wq, self.Wk, self.Wv, self.Wo = None, None, None, None + self.bq, self.bk, self.bv, self.bo = None, None, None, None + super(MultiHeadAttention, self).__init__(**kwargs) + + def get_config(self): + config = { + 'head_num': self.head_num, + 'activation': keras.activations.serialize(self.activation), + 'use_bias': self.use_bias, + 'kernel_initializer': keras.initializers.serialize(self.kernel_initializer), + 'bias_initializer': keras.initializers.serialize(self.bias_initializer), + 'kernel_regularizer': keras.regularizers.serialize(self.kernel_regularizer), + 'bias_regularizer': keras.regularizers.serialize(self.bias_regularizer), + 'kernel_constraint': keras.constraints.serialize(self.kernel_constraint), + 'bias_constraint': keras.constraints.serialize(self.bias_constraint), + 'history_only': self.history_only, + } + base_config = super(MultiHeadAttention, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def compute_output_shape(self, input_shape): + if isinstance(input_shape, list): + q, k, v = input_shape + return q[:-1] + (v[-1],) + return input_shape + + def compute_mask(self, inputs, input_mask=None): + if isinstance(input_mask, list): + return input_mask[0] + return input_mask + + def build(self, input_shape): + if isinstance(input_shape, list): + q, k, v = input_shape + else: + q = k = v = input_shape + feature_dim = v[-1] + if feature_dim % self.head_num != 0: + raise IndexError('Invalid head number %d with the given input dim %d' % (self.head_num, feature_dim)) + self.Wq = self.add_weight( + shape=(q[-1], feature_dim), + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + name='%s_Wq' % self.name, + ) + if self.use_bias: + self.bq = self.add_weight( + shape=(feature_dim,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + name='%s_bq' % self.name, + ) + self.Wk = self.add_weight( + shape=(k[-1], feature_dim), + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + name='%s_Wk' % self.name, + ) + if self.use_bias: + self.bk = self.add_weight( + shape=(feature_dim,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + name='%s_bk' % self.name, + ) + self.Wv = self.add_weight( + shape=(v[-1], feature_dim), + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + name='%s_Wv' % self.name, + ) + if self.use_bias: + self.bv = self.add_weight( + shape=(feature_dim,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + name='%s_bv' % self.name, + ) + self.Wo = self.add_weight( + shape=(feature_dim, feature_dim), + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + name='%s_Wo' % self.name, + ) + if self.use_bias: + self.bo = self.add_weight( + shape=(feature_dim,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + name='%s_bo' % self.name, + ) + super(MultiHeadAttention, self).build(input_shape) + + @staticmethod + def _reshape_to_batches(x, head_num): + input_shape = K.shape(x) + batch_size, seq_len, feature_dim = input_shape[0], input_shape[1], input_shape[2] + head_dim = feature_dim // head_num + x = K.reshape(x, (batch_size, seq_len, head_num, head_dim)) + x = K.permute_dimensions(x, [0, 2, 1, 3]) + return K.reshape(x, (batch_size * head_num, seq_len, head_dim)) + + @staticmethod + def _reshape_from_batches(x, head_num): + input_shape = K.shape(x) + batch_size, seq_len, feature_dim = input_shape[0], input_shape[1], input_shape[2] + x = K.reshape(x, (batch_size // head_num, head_num, seq_len, feature_dim)) + x = K.permute_dimensions(x, [0, 2, 1, 3]) + return K.reshape(x, (batch_size // head_num, seq_len, feature_dim * head_num)) + + @staticmethod + def _reshape_mask(mask, head_num): + if mask is None: + return mask + seq_len = K.shape(mask)[1] + mask = K.expand_dims(mask, axis=1) + mask = K.tile(mask, [1, head_num, 1]) + return K.reshape(mask, (-1, seq_len)) + + def call(self, inputs, mask=None): + if isinstance(inputs, list): + q, k, v = inputs + else: + q = k = v = inputs + if isinstance(mask, list): + q_mask, k_mask, v_mask = mask + else: + q_mask = k_mask = v_mask = mask + q = K.dot(q, self.Wq) + k = K.dot(k, self.Wk) + v = K.dot(v, self.Wv) + if self.use_bias: + q += self.bq + k += self.bk + v += self.bv + if self.activation is not None: + q = self.activation(q) + k = self.activation(k) + v = self.activation(v) + y = ScaledDotProductAttention( + history_only=self.history_only, + name='%s-Attention' % self.name, + )( + inputs=[ + self._reshape_to_batches(q, self.head_num), + self._reshape_to_batches(k, self.head_num), + self._reshape_to_batches(v, self.head_num), + ], + mask=[ + self._reshape_mask(q_mask, self.head_num), + self._reshape_mask(k_mask, self.head_num), + self._reshape_mask(v_mask, self.head_num), + ], + ) + y = self._reshape_from_batches(y, self.head_num) + y = K.dot(y, self.Wo) + if self.use_bias: + y += self.bo + if self.activation is not None: + y = self.activation(y) + return y diff --git a/keras_layers/transformer_utils/readme.md b/keras_layers/transformer_utils/readme.md new file mode 100644 index 0000000..20ff0a6 --- /dev/null +++ b/keras_layers/transformer_utils/readme.md @@ -0,0 +1,7 @@ +reference: + 1.code from github: + author:CyberZHG; + url: https://github.com/CyberZHG/keras-transformer. + 2.change: + some code of function and so on. + diff --git a/keras_layers/transformer_utils/scale_dot_product_attention.py b/keras_layers/transformer_utils/scale_dot_product_attention.py new file mode 100644 index 0000000..17f9aea --- /dev/null +++ b/keras_layers/transformer_utils/scale_dot_product_attention.py @@ -0,0 +1,82 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2021/7/22 9:26 +# @author :Mo +# @function : + + +from keras.layers import Layer +import keras.backend as K + + +class ScaledDotProductAttention(Layer): + """The attention layer that takes three inputs representing queries, keys and values. + + \text{Attention}(Q, K, V) = \text{softmax}(\frac{Q K^T}{\sqrt{d_k}}) V + + See: https://arxiv.org/pdf/1706.03762.pdf + """ + + def __init__(self, + return_attention=False, + history_only=False, + **kwargs): + """Initialize the layer. + + :param return_attention: Whether to return attention weights. + :param history_only: Whether to only use history data. + :param kwargs: Arguments for parent class. + """ + self.supports_masking = True + self.return_attention = return_attention + self.history_only = history_only + super(ScaledDotProductAttention, self).__init__(**kwargs) + + def get_config(self): + config = { + 'return_attention': self.return_attention, + 'history_only': self.history_only, + } + base_config = super(ScaledDotProductAttention, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def compute_output_shape(self, input_shape): + if isinstance(input_shape, list): + query_shape, key_shape, value_shape = input_shape + else: + query_shape = key_shape = value_shape = input_shape + output_shape = query_shape[:-1] + value_shape[-1:] + if self.return_attention: + attention_shape = query_shape[:2] + (key_shape[1],) + return [output_shape, attention_shape] + return output_shape + + def compute_mask(self, inputs, mask=None): + if isinstance(mask, list): + mask = mask[0] + if self.return_attention: + return [mask, None] + return mask + + def call(self, inputs, mask=None, **kwargs): + if isinstance(inputs, list): + query, key, value = inputs + else: + query = key = value = inputs + if isinstance(mask, list): + mask = mask[1] + feature_dim = K.shape(query)[-1] + e = K.batch_dot(query, key, axes=2) / K.sqrt(K.cast(feature_dim, dtype=K.floatx())) + e = K.exp(e - K.max(e, axis=-1, keepdims=True)) + if self.history_only: + query_len, key_len = K.shape(query)[1], K.shape(key)[1] + indices = K.tile(K.expand_dims(K.arange(key_len), axis=0), [query_len, 1]) + upper = K.expand_dims(K.arange(key_len), axis=-1) + e *= K.expand_dims(K.cast(indices <= upper, K.floatx()), axis=0) + if mask is not None: + e *= K.cast(K.expand_dims(mask, axis=-2), K.floatx()) + a = e / (K.sum(e, axis=-1, keepdims=True) + K.epsilon()) + v = K.batch_dot(a, value) + if self.return_attention: + return [v, a] + return v \ No newline at end of file diff --git a/keras_layers/transformer_utils/triangle_position_embedding.py b/keras_layers/transformer_utils/triangle_position_embedding.py new file mode 100644 index 0000000..22b1f44 --- /dev/null +++ b/keras_layers/transformer_utils/triangle_position_embedding.py @@ -0,0 +1,116 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2021/7/22 9:36 +# @author :Mo +# @function : + + +from keras.layers import Layer +import keras.backend as K + + +class TriglePositiomEmbedding(Layer): + """Position embedding use sine and cosine functions. + + See: https://arxiv.org/pdf/1706.03762 + + Expand mode: + # Input shape + 2D tensor with shape: `(batch_size, sequence_length)`. + + # Output shape + 3D tensor with shape: `(batch_size, sequence_length, output_dim)`. + + Add mode: + # Input shape + 3D tensor with shape: `(batch_size, sequence_length, feature_dim)`. + + # Output shape + 3D tensor with shape: `(batch_size, sequence_length, feature_dim)`. + + Concat mode: + # Input shape + 3D tensor with shape: `(batch_size, sequence_length, feature_dim)`. + + # Output shape + 3D tensor with shape: `(batch_size, sequence_length, feature_dim + output_dim)`. + """ + MODE_EXPAND = 'expand' + MODE_ADD = 'add' + MODE_CONCAT = 'concat' + + def __init__(self, + mode=MODE_ADD, + output_dim=None, + **kwargs): + """ + :param output_dim: The embedding dimension. + :param kwargs: + """ + if mode in [self.MODE_EXPAND, self.MODE_CONCAT]: + if output_dim is None: + raise NotImplementedError('`output_dim` is required in `%s` mode' % mode) + if output_dim % 2 != 0: + raise NotImplementedError('It does not make sense to use an odd output dimension: %d' % output_dim) + self.mode = mode + self.output_dim = output_dim + self.supports_masking = True + super(TriglePositiomEmbedding, self).__init__(**kwargs) + + def get_config(self): + config = { + 'mode': self.mode, + 'output_dim': self.output_dim, + } + base_config = super(TriglePositiomEmbedding, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def compute_mask(self, inputs, mask=None): + return mask + + def compute_output_shape(self, input_shape): + if self.mode == self.MODE_EXPAND: + return input_shape + (self.output_dim,) + if self.mode == self.MODE_CONCAT: + return input_shape[:-1] + (input_shape[-1] + self.output_dim,) + return input_shape + + def call(self, inputs, mask=None): + input_shape = K.shape(inputs) + if self.mode == self.MODE_ADD: + batch_size, seq_len, output_dim = input_shape[0], input_shape[1], input_shape[2] + pos_input = K.tile(K.expand_dims(K.arange(seq_len), axis=0), [batch_size, 1]) + elif self.mode == self.MODE_CONCAT: + batch_size, seq_len, output_dim = input_shape[0], input_shape[1], self.output_dim + pos_input = K.tile(K.expand_dims(K.arange(seq_len), axis=0), [batch_size, 1]) + else: + output_dim = self.output_dim + pos_input = inputs + if K.dtype(pos_input) != K.floatx(): + pos_input = K.cast(pos_input, K.floatx()) + evens = K.arange(output_dim // 2) * 2 + odds = K.arange(output_dim // 2) * 2 + 1 + even_embd = K.sin( + K.dot( + K.expand_dims(pos_input, -1), + K.expand_dims(1.0 / K.pow( + 10000.0, + K.cast(evens, K.floatx()) / K.cast(output_dim, K.floatx()) + ), 0) + ) + ) + odd_embd = K.cos( + K.dot( + K.expand_dims(pos_input, -1), + K.expand_dims(1.0 / K.pow( + 10000.0, K.cast((odds - 1), K.floatx()) / K.cast(output_dim, K.floatx()) + ), 0) + ) + ) + embd = K.stack([even_embd, odd_embd], axis=-1) + output = K.reshape(embd, [-1, K.shape(inputs)[1], output_dim]) + if self.mode == self.MODE_CONCAT: + output = K.concatenate([inputs, output], axis=-1) + if self.mode == self.MODE_ADD: + output += inputs + return output diff --git a/textCNN/__init__.py b/mLSTM/__init__.py similarity index 100% rename from textCNN/__init__.py rename to mLSTM/__init__.py diff --git a/mLSTM/graph.py b/mLSTM/graph.py new file mode 100644 index 0000000..082ee3b --- /dev/null +++ b/mLSTM/graph.py @@ -0,0 +1,221 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/8 11:45 +# @author :Mo +# @function :RCNN model +# paper: Recurrent Convolutional Neural Networks for TextClassification(http://www.nlpr.ia.ac.cn/cip/~liukang/liukangPageFile/Recurrent%20Convolutional%20Neural%20Networks%20for%20Text%20Classification.pdf) + +from __future__ import print_function, division + +from keras.layers import Conv1D, Conv2D, MaxPooling2D, Dense, Lambda +from keras.layers import Dropout, Reshape, Concatenate +from keras.layers import LSTM +from keras.layers import Flatten +from keras.models import Model +from keras import backend as K +from keras import regularizers +from base.graph import graph +from keras_layers.attention_dot import Attention + +class LSTMGraph(graph): + def __init__(self, hyper_parameters): + """ + 初始化 + :param hyper_parameters: json,超参 + """ + self.train_mode = hyper_parameters['train_mode'] + self.rnn_type = hyper_parameters['model'].get('rnn_type', 'LSTM') + self.rnn_units = hyper_parameters['model'].get('rnn_units', 256) # large, small is 300 + super().__init__(hyper_parameters) + + def create_model_bilstm_cnn(self, hyper_parameters): + """ + 构建神经网络,行卷积加池化 + :param hyper_parameters:json, hyper parameters of network + :return: tensor, moedl + """ + super().create_model(hyper_parameters) + embedding_output = self.word_embedding.output + # 反向 + x_backwords = LSTM(units=self.rnn_units, + return_sequences=True, + kernel_regularizer=regularizers.l2(0.32 * 0.1), + recurrent_regularizer=regularizers.l2(0.32), + go_backwards = True)(embedding_output) + x_backwords_reverse = Lambda(lambda x: K.reverse(x, axes=1))(x_backwords) + # 前向 + x_fordwords = LSTM(units=self.rnn_units, + return_sequences=True, + kernel_regularizer=regularizers.l2(0.32 * 0.1), + recurrent_regularizer=regularizers.l2(0.32), + go_backwards = False)(embedding_output) + + if "attention" in self.train_mode: + attention_out = Attention()(embedding_output) + # 拼接 + x_feb = Concatenate(axis=2)([x_fordwords, embedding_output, x_backwords_reverse, attention_out]) + else: + x_feb = Concatenate(axis=2)([x_fordwords, embedding_output, x_backwords_reverse]) + + if "cnn" in self.train_mode: + ####使用多个卷积核################################################## + x_feb = Dropout(self.dropout)(x_feb) + # Concatenate后的embedding_size + dim_2 = K.int_shape(x_feb)[2] + x_feb_reshape = Reshape((self.len_max, dim_2, 1))(x_feb) + # 提取n-gram特征和最大池化, 一般不用平均池化 + conv_pools = [] + for filter in self.filters: + conv = Conv2D(filters = self.filters_num, + kernel_size = (filter, dim_2), + padding = 'valid', + kernel_initializer = 'normal', + activation = 'relu', + )(x_feb_reshape) + pooled = MaxPooling2D(pool_size = (self.len_max - filter + 1, 1), + strides = (1, 1), + padding = 'valid', + )(conv) + conv_pools.append(pooled) + # 拼接 + x = Concatenate()(conv_pools) + x = Dropout(self.dropout)(x) + else: + x = x_feb + + # x = Attention()(x) + x = Flatten()(x) + x = Dense(units=128, activation="tanh")(x) + x = Dropout(self.dropout)(x) + output = Dense(units=self.label, activation=self.activate_classify)(x) + self.model = Model(inputs=self.word_embedding.input, outputs=output) + self.model.summary(120) + + def create_model_silstm_cnn(self, hyper_parameters): + """ + 构建神经网络,行卷积加池化 + :param hyper_parameters:json, hyper parameters of network + :return: tensor, moedl + """ + super().create_model(hyper_parameters) + embedding_output = self.word_embedding.output + # 前向 + x_fordwords = LSTM(units=self.rnn_units, + return_sequences=True, + kernel_regularizer=regularizers.l2(0.32 * 0.1), + recurrent_regularizer=regularizers.l2(0.32), + go_backwards = False)(embedding_output) + + if "attention" in self.train_mode: + attention_out = Attention()(embedding_output) + # 拼接 + x_feb = Concatenate(axis=2)([x_fordwords, embedding_output, attention_out]) + else: + x_feb = Concatenate(axis=2)([x_fordwords, embedding_output]) + + if "cnn" in self.train_mode: + ####使用多个卷积核################################################## + x_feb = Dropout(self.dropout)(x_feb) + # Concatenate后的embedding_size + dim_2 = K.int_shape(x_feb)[2] + x_feb_reshape = Reshape((self.len_max, dim_2, 1))(x_feb) + # 提取n-gram特征和最大池化, 一般不用平均池化 + conv_pools = [] + for filter in self.filters: + conv = Conv2D(filters = self.filters_num, + kernel_size = (filter, dim_2), + padding = 'valid', + kernel_initializer = 'normal', + activation = 'relu', + )(x_feb_reshape) + pooled = MaxPooling2D(pool_size = (self.len_max - filter + 1, 1), + strides = (1, 1), + padding = 'valid', + )(conv) + conv_pools.append(pooled) + # 拼接 + x = Concatenate()(conv_pools) + x = Dropout(self.dropout)(x) + else: + x = x_feb + + x = Flatten()(x) + x = Dense(units=128, activation="tanh")(x) + x = Dropout(self.dropout)(x) + output = Dense(units=self.label, activation=self.activate_classify)(x) + self.model = Model(inputs=self.word_embedding.input, outputs=output) + self.model.summary(120) + + def create_model_slstm(self, hyper_parameters): + """ + 构建神经网络,行卷积加池化 + :param hyper_parameters:json, hyper parameters of network + :return: tensor, moedl + """ + super().create_model(hyper_parameters) + embedding_output = self.word_embedding.output + + # 前向 + x = LSTM(units=self.rnn_units, + return_sequences=True, + kernel_regularizer=regularizers.l2(0.32 * 0.1), + recurrent_regularizer=regularizers.l2(0.32), + go_backwards=False)(embedding_output) + + x = Flatten()(x) + output = Dense(units=self.label, activation=self.activate_classify)(x) + self.model = Model(inputs=self.word_embedding.input, outputs=output) + self.model.summary(120) + + def create_model_bilstm(self, hyper_parameters): + """ + 构建神经网络,行卷积加池化 + :param hyper_parameters:json, hyper parameters of network + :return: tensor, moedl + """ + super().create_model(hyper_parameters) + embedding_output = self.word_embedding.output + # 反向 + x_backwords = LSTM(units=self.rnn_units, + return_sequences=True, + kernel_regularizer=regularizers.l2(0.32 * 0.1), + recurrent_regularizer=regularizers.l2(0.32), + go_backwards=True)(embedding_output) + x_backwords_reverse = Lambda(lambda x: K.reverse(x, axes=1))(x_backwords) + # 前向 + x_fordwords = LSTM(units=self.rnn_units, + return_sequences=True, + kernel_regularizer=regularizers.l2(0.32 * 0.1), + recurrent_regularizer=regularizers.l2(0.32), + go_backwards=False)(embedding_output) + + x = Concatenate(axis=2)([x_fordwords, x_backwords_reverse]) + x = Flatten()(x) + output = Dense(units=self.label, activation=self.activate_classify)(x) + self.model = Model(inputs=self.word_embedding.input, outputs=output) + self.model.summary(120) + + def create_model(self, hyper_parameters): + if "Bi" == self.train_mode: + self.create_model_bilstm(hyper_parameters) + elif "Bi_" in self.train_mode: + self.create_model_bilstm_cnn(hyper_parameters) + elif "Si_" in self.train_mode: + self.create_model_silstm_cnn(hyper_parameters) + else: # Si or non + self.create_model_slstm(hyper_parameters) + +# 卷积的2种方式 +# # 1 github: https://github.com/ShawnyXiao/TextClassification-Keras/tree/master/model/RCNN/rcnn.py +# x = Conv1D(64, kernel_size=1, activation='tanh')(x) +# x = GlobalMaxPooling1D()(x) +# +# +# # 2 github : https://github.com/airalcorn2/Recurrent-Convolutional-Neural-Network-Text-Classifier/blob/master/recurrent_convolutional_keras.py +# semantic = Conv1D(hidden_dim_2, kernel_size=1, activation="tanh")() # See equation (4). +# # Keras provides its own max-pooling layers, but they cannot handle variable length input +# # (as far as I can tell). As a result, I define my own max-pooling layer here. +# pool_rnn = Lambda(lambda x: backend.max(x, axis=1), output_shape=(hidden_dim_2,))(semantic) # See equation (5). + + + diff --git a/mLSTM/train.py b/mLSTM/train.py new file mode 100644 index 0000000..709bb20 --- /dev/null +++ b/mLSTM/train.py @@ -0,0 +1,179 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/8 14:37 +# @author :Mo +# @function :train of RCNNGraph_kim with baidu-qa-2019 in question title + + +# 适配linux +import pathlib +import sys +import os +project_path = str(pathlib.Path(os.path.abspath(__file__)).parent.parent.parent) +sys.path.append(project_path) + +# 地址 +from conf.path_config import path_model, path_fineture, path_model_dir, path_hyper_parameters, path_out +# 训练验证数据地址 +from conf.path_config import path_train, path_valid, path_tests +# 数据预处理, 删除文件目录下文件 +from data_preprocess.text_preprocess import PreprocessTextMulti, delete_file, load_json, save_json, transform_multilabel_to_multihot, txt_write +from data_preprocess.utils import mkdir, draw_accuracy_figure, copy_file +# 模型图 +from mLSTM.graph import LSTMGraph as Graph +# 模型评估 +from sklearn.metrics import classification_report, hamming_loss +# 计算时间 +import time + + +def train(hyper_parameters=None, rate=1.0): + """ + 训练函数 + :param hyper_parameters: json, 超参数 + :param rate: 比率, 抽出rate比率语料取训练 + :return: None + """ + if not hyper_parameters: + hyper_parameters = { + 'train_name':'LSTM', + 'train_mode':'Bi_cnn', # non, Si, Si_cnn, Si_attention, Si_attention_cnn, Bi, Bi_cnn, Bi_attention, Bi_attention_cnn + 'train_time': None, + 'path_train_out': 'None', + 'len_max': 60, # 句子最大长度, 固定 推荐20-50 + 'embed_size': 300, # 字/词向量维度 + 'vocab_size': 20000, # 这里随便填的,会根据代码里修改 + 'trainable': True, # embedding是静态的还是动态的, 即控制可不可以微调 + 'level_type': 'char', # 级别, 最小单元, 字/词, 填 'char' or 'word' + 'embedding_type': 'random', # 级别, 嵌入类型, 还可以填'xlnet'、'random'、 'bert'、 'albert' or 'word2vec" + 'gpu_memory_fraction': 0.86, #gpu使用率 + 'model': {'label': 51, # 类别数 + 'batch_size': 256, # 批处理尺寸, 感觉原则上越大越好,尤其是样本不均衡的时候, batch_size设置影响比较大 + 'filters': [2, 3, 4], # 卷积核尺寸 + 'filters_num': 300, # 卷积个数 text-cnn:300-600 + 'channel_size': 1, # CNN通道数 + 'dropout': 0.2, # 随机失活, 概率 + 'decay_step': 100, # 学习率衰减step, 每N个step衰减一次 + 'decay_rate': 0.9, # 学习率衰减系数, 乘法 + 'epochs': 1, # 训练最大轮次 + 'patience': 3, # 早停,2-3就好 + 'lr': 5e-4, # 学习率, 对训练会有比较大的影响, 如果准确率一直上不去,可以考虑调这个参数 + 'l2': 1e-9, # l2正则化 + 'activate_classify': 'softmax', # 最后一个layer, 即分类激活函数 + #'loss': 'categorical_crossentropy', # 损失函数 + 'loss': 'binary_crossentropy', # 损失函数, 可能有问题, 可以自己定义 categorical_crossentropy + 'metrics': 'accuracy', # 保存更好模型的评价标准 + 'is_training': True, # 训练后者是测试模型 + 'path_model_dir': path_model_dir, # 模型目录 + 'model_path': path_model, + # 模型地址, loss降低则保存的依据, save_best_only=True, save_weights_only=True + 'path_hyper_parameters': path_hyper_parameters, # 模型(包括embedding),超参数地址, + 'path_fineture': path_fineture, # 保存embedding trainable地址, 例如字向量、词向量、bert向量等 + 'rnn_type': 'LSTM', # rnn_type类型, 还可以填"GRU" + 'rnn_units': 256, # RNN隐藏层 + }, + 'embedding': {'layer_indexes': [12], # bert取的层数, + # 'corpus_path': '', # embedding预训练数据地址,不配则会默认取conf里边默认的地址, keras-bert可以加载谷歌版bert,百度版ernie(需转换,https://github.com/ArthurRizar/tensorflow_ernie),哈工大版bert-wwm(tf框架,https://github.com/ymcui/Chinese-BERT-wwm) + }, + 'data':{'train_data': path_train, # 训练数据 + 'val_data': path_valid, # 验证数据 + 'test_data': path_tests, # 测试数据 + }, + } + + output_path = path_out + hyper_parameters['train_name'] + '_' + hyper_parameters['train_mode'] + mkdir(output_path) + hyper_parameters['path_train_out'] = output_path + + # 删除先前存在的模型\embedding微调模型等 + delete_file(path_model_dir) + time_start = time.time() + # graph初始化 + graph = Graph(hyper_parameters) + print("graph init ok!") + ra_ed = graph.word_embedding + # 数据预处理 + pt = PreprocessTextMulti(path_model_dir) + x_train, y_train = pt.preprocess_label_ques_to_idx(hyper_parameters['embedding_type'], + hyper_parameters['data']['train_data'], + ra_ed, rate=rate, shuffle=True) + x_val, y_val = pt.preprocess_label_ques_to_idx(hyper_parameters['embedding_type'], + hyper_parameters['data']['val_data'], + ra_ed, rate=rate, shuffle=True) + print("data propress ok!") + print(len(y_train)) + # 训练 + H = graph.fit(x_train, y_train, x_val, y_val) + train_time = time.time() - time_start + hyper_parameters['train_time'] = train_time + print("耗时:" + str(train_time)) + + # 绘图 + draw_accuracy_figure(H, output_path) + copy_file(path_hyper_parameters, output_path + '/hyper_parameters.json') + +def pred_tet(path_hyper_parameter=path_hyper_parameters, path_test=None, rate=1.0): + preout_parameters = { + 'predict_time': None, + 'predict_acc': None, + 'hamming_loss': None, + 'predict_report': 'None', + } + # 测试集的准确率 + hyper_parameters = load_json(path_hyper_parameter) + path_json = hyper_parameters['path_train_out'] + '/predict_resault.json' + time_start = time.time() + + # graph初始化 + graph = Graph(hyper_parameters) + print("graph init ok!") + graph.load_model() + print("graph load ok!") + ra_ed = graph.word_embedding + + # 数据预处理 + pt = PreprocessTextMulti(path_model_dir) + x, y = pt.preprocess_label_ques_to_idx(hyper_parameters['embedding_type'], path_test, + ra_ed, rate, shuffle=True) + y_pred = [] + index_y = [] + pred = graph.predict(x) + + print(pred) + for i in range(len(pred)): + pre = pt.prereocess_idx(pred[i]) + label_pred = pre[0][0][0] + label_pred = pt.l2i_i2l['l2i'][label_pred] + label_multi_idex = transform_multilabel_to_multihot(label_pred, label=51) + y_pred.append(label_multi_idex) + index_y.append(y[i].tolist()) + print(pre) + print(label_multi_idex) + print(y[i].tolist()) + print('=========================') + + print("data pred ok!") + # 预测结果转为int类型 + # index_y = [pt.l2i_i2l['l2i'][i] for i in y] + # index_pred = [pt.l2i_i2l['l2i'][i] for i in y_pred] + target_names = [pt.l2i_i2l['i2l'][str(i)] for i in range(hyper_parameters['model'].get('label', '51'))] + print(target_names) + # 评估 + report_predict = classification_report(index_y, y_pred, digits=9, target_names=target_names) + preout_parameters['predict_report'] = report_predict + print(report_predict) + txt_write(list_line=report_predict, file_path=hyper_parameters['path_train_out'] + '/report.txt') + + h_loss = hamming_loss(index_y, y_pred) + preout_parameters['hamming_loss'] = h_loss + print("Hamming Loss = {:.6f}".format(h_loss)) + + predict_time = time.time() - time_start + preout_parameters['predict_time'] = predict_time + print("耗时:" + str(predict_time)) + + save_json(jsons=preout_parameters, json_path=path_json) + +if __name__ == "__main__": + train(rate=1) + pred_tet(path_test=path_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少 \ No newline at end of file diff --git a/mTextCNN/__init__.py b/mTextCNN/__init__.py new file mode 100644 index 0000000..4d53acf --- /dev/null +++ b/mTextCNN/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/8/14 21:23 +# @author :Mo +# @function : \ No newline at end of file diff --git a/textCNN/graph.py b/mTextCNN/graph.py similarity index 89% rename from textCNN/graph.py rename to mTextCNN/graph.py index dec7a91..ffdca1c 100644 --- a/textCNN/graph.py +++ b/mTextCNN/graph.py @@ -8,8 +8,9 @@ from keras.layers import Reshape, Concatenate, Conv2D, MaxPool2D from keras.layers import Dense, Dropout, Flatten from keras.models import Model - -from keras_textclassification.base.graph import graph +from keras import backend as K +from keras_layers.attention_dot import Attention +from base.graph import graph class TextCNNGraph(graph): @@ -18,6 +19,8 @@ def __init__(self, hyper_parameters): 初始化 :param hyper_parameters: json,超参 """ + self.train_mode = hyper_parameters['train_mode'] + self.rnn_units = hyper_parameters['model'].get('rnn_units', 256) # large, small is 300 super().__init__(hyper_parameters) def create_model(self, hyper_parameters): @@ -28,6 +31,17 @@ def create_model(self, hyper_parameters): """ super().create_model(hyper_parameters) embedding = self.word_embedding.output + + # attention start + if self.train_mode == 'attention': + atten = Attention()(embedding) + atten_reshiape = Reshape((self.len_max, self.embed_size, 1))(atten) + atten = Conv2D(filters=self.filters_num, kernel_size=(3, self.embed_size), padding='valid', + kernel_initializer='normal', activation='tanh')(atten_reshiape) + atten = MaxPool2D(pool_size=(self.len_max - 2, 1), strides=(1, 1), padding='valid')(atten) + # attention end + + self.embed_size = K.int_shape(embedding)[2] embedding_reshape = Reshape((self.len_max, self.embed_size, 1))(embedding) # 提取n-gram特征和最大池化, 一般不用平均池化 conv_pools = [] @@ -42,7 +56,11 @@ def create_model(self, hyper_parameters): strides = (1, 1), padding = 'valid', )(conv) + conv_pools.append(pooled) + + if self.train_mode == 'attention': + conv_pools.append(atten) # add attention # 拼接 x = Concatenate(axis=-1)(conv_pools) x = Dropout(self.dropout)(x) @@ -160,7 +178,6 @@ def create_compile(self): 构建优化器、损失函数和评价函数 :return: """ - from keras_textclassification.keras_layers.keras_radam import RAdam from keras.optimizers import Adam # self.model.compile(optimizer=Adam(lr=self.lr, beta_1=0.9, beta_2=0.999, decay=0.0), # loss=[self.focal_loss(alpha=.25, gamma=2)], diff --git a/mTextCNN/train.py b/mTextCNN/train.py new file mode 100644 index 0000000..40935a7 --- /dev/null +++ b/mTextCNN/train.py @@ -0,0 +1,183 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/8/14 16:14 +# @author :Mo +# @function : + + +# 适配linux +import pathlib +import sys +import os + +project_path = str(pathlib.Path(os.path.abspath(__file__)).parent.parent.parent) +sys.path.append(project_path) +# 地址 +from conf.path_config import path_model, path_fineture, path_model_dir, path_hyper_parameters, path_embedding, path_multi_label +# 训练验证数据地址 +from conf.path_config import path_train, path_valid, path_tests, path_out +# 数据转换 excel -> csv +from data_preprocess.data_excel2csv import preprocess_excel_data as pre_pro +# 数据预处理, 删除文件目录下文件 +from data_preprocess.text_preprocess import PreprocessTextMulti, delete_file, load_json, save_json, transform_multilabel_to_multihot, txt_write +from data_preprocess.utils import mkdir, draw_accuracy_figure +# 模型图 +from mTextCNN.graph import TextCNNGraph as Graph +# 模型评估 +from sklearn.metrics import classification_report, hamming_loss +# 计算时间 +import time + +def train(hyper_parameters=None, rate=1.0): + if not hyper_parameters: + hyper_parameters = { + 'train_name':'textCNN', + 'train_mode':'attention', # non, attention + 'train_time': None, + 'path_train_out': 'None', + 'len_max': 60, # 句子最大长度, 固定推荐20-50, bert越长会越慢, 占用空间也会变大, 本地win10-4G设为20就好, 过大小心OOM + 'embed_size': 300, # 字/词向量维度, bert取768, word取300, char可以更小些 + 'vocab_size': 20000, # 这里随便填的,会根据代码里修改 + 'trainable': True, # embedding是静态的还是动态的, 即控制可不可以微调 + 'level_type': 'word', # 级别, 最小单元, 字/词, 填 'char' or 'word', 注意:word2vec模式下训练语料要首先切好 + 'embedding_type': 'word2vec', # 级别, 嵌入类型, 还可以填'xlnet'、'random'、 'bert'、 'albert' or 'word2vec" + 'gpu_memory_fraction': 0.86, #gpu使用率 + 'model': {'label': 51, # 类别数 + 'batch_size': 32, # 批处理尺寸, 感觉原则上越大越好,尤其是样本不均衡的时候, batch_size设置影响比较大 + 'dropout': 0.2, # 随机失活, 概率 + 'decay_step': 100, # 学习率衰减step, 每N个step衰减一次 + 'decay_rate': 0.9, # 学习率衰减系数, 乘法 + 'epochs': 1, # 训练最大轮次 + 'patience': 5, # 早停,2-3就好 + 'lr': 1e-4, # 学习率, bert取5e-5, 其他取1e-3, 对训练会有比较大的影响, 如果准确率一直上不去,可以考虑调这个参数 + 'l2': 1e-9, # l2正则化 + 'activate_classify': 'softmax', # 'sigmoid', # 最后一个layer, 即分类激活函数 + 'loss': 'binary_crossentropy', # 损失函数, 可能有问题, 可以自己定义 categorical_crossentropy, binary_crossentropy + #'metrics': 'top_k_categorical_accuracy', # 1070个类, 太多了先用topk, 这里数据k设置为最大:33 + 'metrics': 'accuracy', # 保存更好模型的评价标准, accuracy, categorical_accuracy + 'is_training': True, # 训练后者是测试模型 + 'model_path': path_model, + # 模型地址, loss降低则保存的依据, save_best_only=True, save_weights_only=True + 'path_hyper_parameters': path_hyper_parameters, # 模型(包括embedding),超参数地址, + 'path_fineture': path_fineture, # 保存embedding trainable地址, 例如字向量、词向量、bert向量等 + 'rnn_units': 256, # RNN隐藏层 + }, + 'embedding': {'layer_indexes': [13], # bert取的层数 + # 'corpus_path': '', # embedding预训练数据地址,不配则会默认取conf里边默认的地址, keras-bert可以加载谷歌版bert,百度版ernie(需转换,https://github.com/ArthurRizar/tensorflow_ernie),哈工大版bert-wwm(tf框架,https://github.com/ymcui/Chinese-BERT-wwm) + }, + 'data':{'train_data': path_train, # 训练数据 + 'val_data': path_valid, # 验证数据 + 'test_data': path_tests, # 测试数据 + }, + } + + output_path = path_out + hyper_parameters['train_name'] + '_' + hyper_parameters['train_mode'] + mkdir(output_path) + hyper_parameters['path_train_out'] = output_path + + # 删除先前存在的模型和embedding微调模型等 + delete_file(path_model_dir) + time_start = time.time() + # graph初始化 + graph = Graph(hyper_parameters) + print("graph init ok!") + ra_ed = graph.word_embedding + # 数据预处理 + pt = PreprocessTextMulti(path_model_dir) + x_train, y_train = pt.preprocess_label_ques_to_idx(hyper_parameters['embedding_type'], + hyper_parameters['data']['train_data'], + ra_ed, rate=rate, shuffle=True) + print('train data propress ok!') + x_val, y_val = pt.preprocess_label_ques_to_idx(hyper_parameters['embedding_type'], + hyper_parameters['data']['val_data'], + ra_ed, rate=rate, shuffle=True) + print("data propress ok!") + print(len(y_train)) + # 训练 + H = graph.fit(x_train, y_train, x_val, y_val) + + train_time = time.time()-time_start + hyper_parameters['train_time'] = train_time + print("耗时:" + str(train_time)) + + # 绘图 + draw_accuracy_figure(H, output_path) + +def pred_tet(path_hyper_parameter=path_hyper_parameters, path_test=None, rate=1.0): + preout_parameters = { + 'predict_time': None, + 'predict_acc': None, + 'hamming_loss': None, + 'predict_report': 'None', + } + # 测试集的准确率 + hyper_parameters = load_json(path_hyper_parameter) + path_json = hyper_parameters['path_train_out'] + '/predict_resault.json' + time_start = time.time() + + # graph初始化 + graph = Graph(hyper_parameters) + print("graph init ok!") + graph.load_model() + print("graph load ok!") + ra_ed = graph.word_embedding + + # 数据预处理 + pt = PreprocessTextMulti(path_model_dir) + x, y = pt.preprocess_label_ques_to_idx(hyper_parameters['embedding_type'], path_test, + ra_ed, rate, shuffle=True) + y_pred = [] + index_y = [] + pred = graph.predict(x) + + print(pred) + for i in range(len(pred)): + pre = pt.prereocess_idx(pred[i]) + label_pred = pre[0][0][0] + label_pred = pt.l2i_i2l['l2i'][label_pred] + label_multi_idex = transform_multilabel_to_multihot(label_pred, label=51) + y_pred.append(label_multi_idex) + index_y.append(y[i].tolist()) + print(pre) + print(label_multi_idex) + print(y[i].tolist()) + print('=========================') + + print("data pred ok!") + # 预测结果转为int类型 + #index_y = [pt.l2i_i2l['l2i'][i] for i in y] + #index_pred = [pt.l2i_i2l['l2i'][i] for i in y_pred] + target_names = [pt.l2i_i2l['i2l'][str(i)] for i in range(hyper_parameters['model'].get('label', '51'))] + print(target_names) + # 评估 + report_predict = classification_report(index_y, y_pred, digits=9, target_names=target_names) + preout_parameters['predict_report'] = report_predict + print(report_predict) + txt_write(list_line=report_predict, file_path=hyper_parameters['path_train_out'] + '/report.txt') + + h_loss = hamming_loss(index_y, y_pred) + preout_parameters['hamming_loss'] = h_loss + print("Hamming Loss = {:.6f}".format(h_loss)) + + predict_time = time.time() - time_start + preout_parameters['predict_time'] = predict_time + print("耗时:" + str(predict_time)) + + save_json(jsons=preout_parameters, json_path=path_json) + +def cread_out_dir(): + mkdir(path_out) + mkdir(path_embedding) + mkdir(path_multi_label) + mkdir(path_model_dir) + +def pro_processdata(): + pre = pre_pro() # 实例化 + pre.excel2csv() # 数据预处理, excel文件转为csv, 拆分训练集和验证集 + pre.gen_vec() # 根据语料库,生成词向量 + +if __name__=="__main__": + #cread_out_dir() + #pro_processdata() #预处理数据,只需执行一次 + train(rate=1) + pred_tet(path_test=path_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少 diff --git a/mTextRCNN/__init__.py b/mTextRCNN/__init__.py new file mode 100644 index 0000000..190b2f3 --- /dev/null +++ b/mTextRCNN/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/12 15:41 +# @author :Mo +# @function : \ No newline at end of file diff --git a/mTextRCNN/graph.py b/mTextRCNN/graph.py new file mode 100644 index 0000000..34c539d --- /dev/null +++ b/mTextRCNN/graph.py @@ -0,0 +1,172 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/8 11:45 +# @author :Mo +# @function :RCNN model +# paper: Recurrent Convolutional Neural Networks for TextClassification(http://www.nlpr.ia.ac.cn/cip/~liukang/liukangPageFile/Recurrent%20Convolutional%20Neural%20Networks%20for%20Text%20Classification.pdf) + + +from __future__ import print_function, division + +from keras.layers import Conv1D, Conv2D, MaxPooling2D, MaxPooling1D, Dense, Lambda +from keras.layers import Dropout, Reshape, Concatenate +from keras.layers import LSTM, GRU +from keras.layers import Flatten +from keras.models import Model +from keras import backend as K +from keras import regularizers + +from base.graph import graph +from keras_layers.attention_dot import Attention + + + +class RCNNGraph(graph): + def __init__(self, hyper_parameters): + """ + 初始化 + :param hyper_parameters: json,超参 + """ + self.train_mode = hyper_parameters['train_mode'] + self.rnn_type = hyper_parameters['model'].get('rnn_type', 'LSTM') + self.rnn_units = hyper_parameters['model'].get('rnn_units', 256) # large, small is 300 + super().__init__(hyper_parameters) + + def create_model(self, hyper_parameters): + """ + 构建神经网络,行卷积加池化 + :param hyper_parameters:json, hyper parameters of network + :return: tensor, moedl + """ + super().create_model(hyper_parameters) + embedding_output = self.word_embedding.output + # rnn layers + if self.rnn_units=="LSTM": + layer_cell = LSTM + else: + layer_cell = GRU + # 反向 + x_backwords = layer_cell(units=self.rnn_units, + return_sequences=True, + kernel_regularizer=regularizers.l2(0.32 * 0.1), + recurrent_regularizer=regularizers.l2(0.32), + go_backwards = True)(embedding_output) + x_backwords_reverse = Lambda(lambda x: K.reverse(x, axes=1))(x_backwords) + # 前向 + x_fordwords = layer_cell(units=self.rnn_units, + return_sequences=True, + kernel_regularizer=regularizers.l2(0.32 * 0.1), + recurrent_regularizer=regularizers.l2(0.32), + go_backwards = False)(embedding_output) + + + if self.train_mode == "attention": + attention_out = Attention()(embedding_output) # add attention + x_feb = Concatenate(axis=2)([x_fordwords, embedding_output, x_backwords_reverse, attention_out]) + else: + # 拼接 + x_feb = Concatenate(axis=2)([x_fordwords, embedding_output, x_backwords_reverse]) + + ####使用多个卷积核################################################## + x_feb = Dropout(self.dropout)(x_feb) + # Concatenate后的embedding_size + dim_2 = K.int_shape(x_feb)[2] + x_feb_reshape = Reshape((self.len_max, dim_2, 1))(x_feb) + # 提取n-gram特征和最大池化, 一般不用平均池化 + conv_pools = [] + for filter in self.filters: + conv = Conv2D(filters = self.filters_num, + kernel_size = (filter, dim_2), + padding = 'valid', + kernel_initializer = 'normal', + activation = 'relu', + )(x_feb_reshape) + pooled = MaxPooling2D(pool_size = (self.len_max - filter + 1, 1), + strides = (1, 1), + padding = 'valid', + )(conv) + conv_pools.append(pooled) + # 拼接 + x = Concatenate()(conv_pools) + x = Dropout(self.dropout)(x) + x = Flatten()(x) + ######################################################################### + x = Dense(units=128, activation="tanh")(x) + x = Dropout(self.dropout)(x) + output = Dense(units=self.label, activation=self.activate_classify)(x) + self.model = Model(inputs=self.word_embedding.input, outputs=output) + self.model.summary(120) + + def create_model_cls(self, hyper_parameters): + """ + 构建神经网络, col, 论文中maxpooling使用的是列池化, 不过实验效果似乎不佳,而且训练速度超级慢 + :param hyper_parameters:json, hyper parameters of network + :return: tensor, moedl + """ + super().create_model(hyper_parameters) + embedding_output = self.word_embedding.output + # rnn layers + if self.rnn_units=="LSTM": + layer_cell = LSTM + else: + layer_cell = GRU + # 反向 + x_backwords = layer_cell(units=self.rnn_units, + return_sequences=True, + kernel_regularizer=regularizers.l2(0.32 * 0.1), + recurrent_regularizer=regularizers.l2(0.32), + go_backwards = True)(embedding_output) + x_backwords_reverse = Lambda(lambda x: K.reverse(x, axes=1))(x_backwords) + # 前向 + x_fordwords = layer_cell(units=self.rnn_units, + return_sequences=True, + kernel_regularizer=regularizers.l2(0.32 * 0.1), + recurrent_regularizer=regularizers.l2(0.32), + go_backwards = False)(embedding_output) + # 拼接 + x_feb = Concatenate(axis=2)([x_fordwords, embedding_output, x_backwords_reverse]) + + ####列池化################################################## + x_feb = Dropout(self.dropout)(x_feb) + dim_2 = K.int_shape(x_feb)[2] + x_feb_reshape = Reshape((dim_2, self.len_max))(x_feb) + + conv_pools = [] + for filter in self.filters: + conv = Conv1D(filters=self.filters_num, # filter=300 + kernel_size=filter, + padding='valid', + kernel_initializer='normal', + activation='relu', + )(x_feb_reshape) + pooled = MaxPooling1D(padding='valid', + pool_size=32, + )(conv) + conv_pools.append(pooled) + x = Concatenate(axis=1)(conv_pools) + # x = MaxPooling1D(padding = 'VALID',)(x_feb_reshape) + x = Flatten()(x) + x = Dropout(self.dropout)(x) + + ######################################################################### + + output = Dense(units=self.label, activation=self.activate_classify)(x) + self.model = Model(inputs=self.word_embedding.input, outputs=output) + self.model.summary(120) + + + +# 卷积的2种方式 +# # 1 github: https://github.com/ShawnyXiao/TextClassification-Keras/tree/master/model/RCNN/rcnn.py +# x = Conv1D(64, kernel_size=1, activation='tanh')(x) +# x = GlobalMaxPooling1D()(x) +# +# +# # 2 github : https://github.com/airalcorn2/Recurrent-Convolutional-Neural-Network-Text-Classifier/blob/master/recurrent_convolutional_keras.py +# semantic = Conv1D(hidden_dim_2, kernel_size=1, activation="tanh")() # See equation (4). +# # Keras provides its own max-pooling layers, but they cannot handle variable length input +# # (as far as I can tell). As a result, I define my own max-pooling layer here. +# pool_rnn = Lambda(lambda x: backend.max(x, axis=1), output_shape=(hidden_dim_2,))(semantic) # See equation (5). + + + diff --git a/mTextRCNN/train.py b/mTextRCNN/train.py new file mode 100644 index 0000000..0597b76 --- /dev/null +++ b/mTextRCNN/train.py @@ -0,0 +1,177 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/8 14:37 +# @author :Mo +# @function :train of RCNNGraph_kim with baidu-qa-2019 in question title + + +# 适配linux +import pathlib +import sys +import os +project_path = str(pathlib.Path(os.path.abspath(__file__)).parent.parent.parent) +sys.path.append(project_path) +# 地址 +from conf.path_config import path_model, path_fineture, path_model_dir, path_hyper_parameters +# 训练验证数据地址 +from conf.path_config import path_train, path_valid, path_tests, path_out +# 数据预处理, 删除文件目录下文件 +from data_preprocess.text_preprocess import PreprocessTextMulti, delete_file, load_json, save_json, transform_multilabel_to_multihot, txt_write +from data_preprocess.utils import mkdir, draw_accuracy_figure +# 模型图 +from mTextRCNN.graph import RCNNGraph as Graph +# 模型评估 +from sklearn.metrics import classification_report, hamming_loss +# 计算时间 +import time + +def train(hyper_parameters=None, rate=1.0): + """ + 训练函数 + :param hyper_parameters: json, 超参数 + :param rate: 比率, 抽出rate比率语料取训练 + :return: None + """ + if not hyper_parameters: + hyper_parameters = { + 'train_name':'textRNN', + 'train_mode':'non', # non, attention + 'train_time': None, + 'path_train_out': 'None', + 'len_max': 60, # 句子最大长度, 固定 推荐20-50 + 'embed_size': 300, # 字/词向量维度 + 'vocab_size': 20000, # 这里随便填的,会根据代码里修改 + 'trainable': True, # embedding是静态的还是动态的, 即控制可不可以微调 + 'level_type': 'char', # 级别, 最小单元, 字/词, 填 'char' or 'word' + 'embedding_type': 'random', # 级别, 嵌入类型, 还可以填'xlnet'、'random'、 'bert'、 'albert' or 'word2vec" + 'gpu_memory_fraction': 0.86, #gpu使用率 + 'model': {'label': 51, # 类别数 + 'batch_size': 256, # 批处理尺寸, 感觉原则上越大越好,尤其是样本不均衡的时候, batch_size设置影响比较大 + 'filters': [2, 3, 4, 5], # 卷积核尺寸 + 'filters_num': 300, # 卷积个数 text-cnn:300-600 + 'channel_size': 1, # CNN通道数 + 'dropout': 0.2, # 随机失活, 概率 + 'decay_step': 100, # 学习率衰减step, 每N个step衰减一次 + 'decay_rate': 0.9, # 学习率衰减系数, 乘法 + 'epochs': 1, # 训练最大轮次 + 'patience': 3, # 早停,2-3就好 + 'lr': 5e-4, # 学习率, 对训练会有比较大的影响, 如果准确率一直上不去,可以考虑调这个参数 + 'l2': 1e-9, # l2正则化 + 'activate_classify': 'softmax', # 最后一个layer, 即分类激活函数 + #'loss': 'categorical_crossentropy', # 损失函数 + 'loss': 'binary_crossentropy', # 损失函数, 可能有问题, 可以自己定义 categorical_crossentropy + 'metrics': 'accuracy', # 保存更好模型的评价标准 + 'is_training': True, # 训练后者是测试模型 + 'path_model_dir': path_model_dir, # 模型目录 + 'model_path': path_model, + # 模型地址, loss降低则保存的依据, save_best_only=True, save_weights_only=True + 'path_hyper_parameters': path_hyper_parameters, # 模型(包括embedding),超参数地址, + 'path_fineture': path_fineture, # 保存embedding trainable地址, 例如字向量、词向量、bert向量等 + 'rnn_type': 'LSTM', # rnn_type类型, 还可以填"GRU" + 'rnn_units': 256, # RNN隐藏层 + }, + 'embedding': {'layer_indexes': [12], # bert取的层数, + # 'corpus_path': '', # embedding预训练数据地址,不配则会默认取conf里边默认的地址, keras-bert可以加载谷歌版bert,百度版ernie(需转换,https://github.com/ArthurRizar/tensorflow_ernie),哈工大版bert-wwm(tf框架,https://github.com/ymcui/Chinese-BERT-wwm) + }, + 'data':{'train_data': path_train, # 训练数据 + 'val_data': path_valid, # 验证数据 + 'test_data': path_tests, # 测试数据 + }, + } + + output_path = path_out + hyper_parameters['train_name'] + '_' + hyper_parameters['train_mode'] + mkdir(output_path) + hyper_parameters['path_train_out'] = output_path + + # 删除先前存在的模型\embedding微调模型等 + delete_file(path_model_dir) + time_start = time.time() + # graph初始化 + graph = Graph(hyper_parameters) + print("graph init ok!") + ra_ed = graph.word_embedding + # 数据预处理 + pt = PreprocessTextMulti(path_model_dir) + x_train, y_train = pt.preprocess_label_ques_to_idx(hyper_parameters['embedding_type'], + hyper_parameters['data']['train_data'], + ra_ed, rate=rate, shuffle=True) + x_val, y_val = pt.preprocess_label_ques_to_idx(hyper_parameters['embedding_type'], + hyper_parameters['data']['val_data'], + ra_ed, rate=rate, shuffle=True) + print("data propress ok!") + print(len(y_train)) + # 训练 + H = graph.fit(x_train, y_train, x_val, y_val) + + train_time = time.time()-time_start + hyper_parameters['train_time'] = train_time + print("耗时:" + str(train_time)) + + # 绘图 + draw_accuracy_figure(H, output_path) + +def pred_tet(path_hyper_parameter=path_hyper_parameters, path_test=None, rate=1.0): + preout_parameters = { + 'predict_time': None, + 'predict_acc': None, + 'hamming_loss': None, + 'predict_report': 'None', + } + # 测试集的准确率 + hyper_parameters = load_json(path_hyper_parameter) + path_json = hyper_parameters['path_train_out'] + '/predict_resault.json' + time_start = time.time() + + # graph初始化 + graph = Graph(hyper_parameters) + print("graph init ok!") + graph.load_model() + print("graph load ok!") + ra_ed = graph.word_embedding + + # 数据预处理 + pt = PreprocessTextMulti(path_model_dir) + x, y = pt.preprocess_label_ques_to_idx(hyper_parameters['embedding_type'], path_test, + ra_ed, rate, shuffle=True) + y_pred = [] + index_y = [] + pred = graph.predict(x) + + print(pred) + for i in range(len(pred)): + pre = pt.prereocess_idx(pred[i]) + label_pred = pre[0][0][0] + label_pred = pt.l2i_i2l['l2i'][label_pred] + label_multi_idex = transform_multilabel_to_multihot(label_pred, label=51) + y_pred.append(label_multi_idex) + index_y.append(y[i].tolist()) + print(pre) + print(label_multi_idex) + print(y[i].tolist()) + print('=========================') + + print("data pred ok!") + # 预测结果转为int类型 + #index_y = [pt.l2i_i2l['l2i'][i] for i in y] + #index_pred = [pt.l2i_i2l['l2i'][i] for i in y_pred] + target_names = [pt.l2i_i2l['i2l'][str(i)] for i in range(hyper_parameters['model'].get('label', '51'))] + print(target_names) + # 评估 + report_predict = classification_report(index_y, y_pred, digits=9, target_names=target_names) + preout_parameters['predict_report'] = report_predict + print(report_predict) + txt_write(list_line=report_predict, file_path=hyper_parameters['path_train_out'] + '/report.txt') + + h_loss = hamming_loss(index_y, y_pred) + preout_parameters['hamming_loss'] = h_loss + print("Hamming Loss = {:.6f}".format(h_loss)) + + predict_time = time.time() - time_start + preout_parameters['predict_time'] = predict_time + print("耗时:" + str(predict_time)) + + save_json(jsons=preout_parameters, json_path=path_json) + +if __name__=="__main__": + train(rate=1) + pred_tet(path_test=path_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少 \ No newline at end of file diff --git a/textCNN/predict.py b/textCNN/predict.py deleted file mode 100644 index 93b4f8a..0000000 --- a/textCNN/predict.py +++ /dev/null @@ -1,130 +0,0 @@ -# -*- coding: UTF-8 -*- -# !/usr/bin/python -# @time :2019/6/3 10:51 -# @author :Mo -# @function :pred of text-cnn with baidu-qa-2019 in question title - - -# 适配linux -import pathlib -import sys -import os -project_path = str(pathlib.Path(os.path.abspath(__file__)).parent.parent.parent) -sys.path.append(project_path) -# 地址 -from keras_textclassification.conf.path_config import path_model, path_fineture, path_model_dir, path_hyper_parameters -# 训练验证数据地址 -from keras_textclassification.conf.path_config import path_baidu_qa_2019_train, path_baidu_qa_2019_valid -# 数据预处理, 删除文件目录下文件 -from keras_textclassification.data_preprocess.text_preprocess import PreprocessText, read_and_process, load_json -# 模型图 -from keras_textclassification.m02_TextCNN.graph import TextCNNGraph as Graph -# 模型评估 -from sklearn.metrics import classification_report -# 计算时间 -import time - -import numpy as np - - -def pred_tet(path_hyper_parameter=path_hyper_parameters, path_test=None, rate=1.0): - # 测试集的准确率 - hyper_parameters = load_json(path_hyper_parameter) - if path_test: # 从外部引入测试数据地址 - hyper_parameters['data']['val_data'] = path_test - time_start = time.time() - # graph初始化 - graph = Graph(hyper_parameters) - print("graph init ok!") - graph.load_model() - print("graph load ok!") - ra_ed = graph.word_embedding - # 数据预处理 - pt = PreprocessText(path_model_dir) - y, x = read_and_process(hyper_parameters['data']['val_data']) - # 取该数据集的百分之几的语料测试 - len_rate = int(len(y) * rate) - x = x[1:len_rate] - y = y[1:len_rate] - y_pred = [] - count = 0 - for x_one in x: - count += 1 - ques_embed = ra_ed.sentence2idx(x_one) - - if hyper_parameters['embedding_type'] in ['bert', 'albert']: - x_val_1 = np.array([ques_embed[0]]) - x_val_2 = np.array([ques_embed[1]]) - x_val = [x_val_1, x_val_2] - elif hyper_parameters['embedding_type'] == 'xlnet': - x_val_1 = np.array([ques_embed[0]]) - x_val_2 = np.array([ques_embed[1]]) - x_val_3 = np.array([ques_embed[2]]) - x_val = [x_val_1, x_val_2, x_val_3] - else: - x_val = ques_embed - # 预测 - pred = graph.predict(x_val) - pre = pt.prereocess_idx(pred[0]) - label_pred = pre[0][0][0] - if count % 1000==0: - print(label_pred) - y_pred.append(label_pred) - - print("data pred ok!") - # 预测结果转为int类型 - index_y = [pt.l2i_i2l['l2i'][i] for i in y] - index_pred = [pt.l2i_i2l['l2i'][i] for i in y_pred] - target_names = [pt.l2i_i2l['i2l'][str(i)] for i in list(set((index_pred + index_y)))] - # 评估 - report_predict = classification_report(index_y, index_pred, - target_names=target_names, digits=9) - print(report_predict) - print("耗时:" + str(time.time() - time_start)) - - -def pred_input(path_hyper_parameter=path_hyper_parameters): - # 输入预测 - # 加载超参数 - hyper_parameters = load_json(path_hyper_parameter) - pt = PreprocessText(path_model_dir) - # 模式初始化和加载 - graph = Graph(hyper_parameters) - graph.load_model() - ra_ed = graph.word_embedding - ques = '我要打王者荣耀' - # str to token - ques_embed = ra_ed.sentence2idx(ques) - if hyper_parameters['embedding_type'] in ['bert', 'albert']: - x_val_1 = np.array([ques_embed[0]]) - x_val_2 = np.array([ques_embed[1]]) - x_val = [x_val_1, x_val_2] - else: - x_val = ques_embed - # 预测 - pred = graph.predict(x_val) - # 取id to label and pred - pre = pt.prereocess_idx(pred[0]) - print(pre) - while True: - print("请输入: ") - ques = input() - ques_embed = ra_ed.sentence2idx(ques) - print(ques_embed) - if hyper_parameters['embedding_type'] in ['bert', 'albert']: - x_val_1 = np.array([ques_embed[0]]) - x_val_2 = np.array([ques_embed[1]]) - x_val = [x_val_1, x_val_2] - else: - x_val = ques_embed - pred = graph.predict(x_val) - pre = pt.prereocess_idx(pred[0]) - print(pre) - - -if __name__=="__main__": - # 测试集预测 - pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少 - - # 可输入 input 预测 - pred_input() diff --git a/textCNN/train.py b/textCNN/train.py deleted file mode 100644 index 480a2ea..0000000 --- a/textCNN/train.py +++ /dev/null @@ -1,89 +0,0 @@ -# -*- coding: UTF-8 -*- -# !/usr/bin/python -# @time :2019/6/3 10:51 -# @author :Mo -# @function :train of TextCNN with baidu-qa-2019 in question title - - -# 适配linux -import pathlib -import sys -import os -project_path = str(pathlib.Path(os.path.abspath(__file__)).parent.parent.parent) -sys.path.append(project_path) -# 地址 -from keras_textclassification.conf.path_config import path_model, path_fineture, path_model_dir, path_hyper_parameters -# 训练验证数据地址 -from keras_textclassification.conf.path_config import path_baidu_qa_2019_train, path_baidu_qa_2019_valid -# 数据预处理, 删除文件目录下文件 -from keras_textclassification.data_preprocess.text_preprocess import PreprocessText, delete_file -# 模型图 -from keras_textclassification.m02_TextCNN.graph import TextCNNGraph as Graph -# 计算时间 -import time - - -def train(hyper_parameters=None, rate=1.0): - if not hyper_parameters: - hyper_parameters = { - 'len_max': 50, # 句子最大长度, 固定推荐20-50, bert越长会越慢, 占用空间也会变大, 本地win10-4G设为20就好, 过大小心OOM - 'embed_size': 300, # 字/词向量维度, bert取768, word取300, char可以更小些 - 'vocab_size': 20000, # 这里随便填的,会根据代码里修改 - 'trainable': True, # embedding是静态的还是动态的, 即控制可不可以微调 - 'level_type': 'char', # 级别, 最小单元, 字/词, 填 'char' or 'word', 注意:word2vec模式下训练语料要首先切好 - 'embedding_type': 'random', # 级别, 嵌入类型, 还可以填'xlnet'、'random'、 'bert'、 'albert' or 'word2vec" - 'gpu_memory_fraction': 0.76, #gpu使用率 - 'model': {'label': 17, # 类别数 - 'batch_size': 256, # 批处理尺寸, 感觉原则上越大越好,尤其是样本不均衡的时候, batch_size设置影响比较大 - 'dropout': 0.5, # 随机失活, 概率 - 'decay_step': 100, # 学习率衰减step, 每N个step衰减一次 - 'decay_rate': 0.9, # 学习率衰减系数, 乘法 - 'epochs': 20, # 训练最大轮次 - 'patience': 3, # 早停,2-3就好 - 'lr': 5e-3, # 学习率,bert取5e-5,其他取1e-3, 对训练会有比较大的影响, 如果准确率一直上不去,可以考虑调这个参数 - 'l2': 1e-9, # l2正则化 - 'activate_classify': 'softmax', # 最后一个layer, 即分类激活函数 - 'loss': 'categorical_crossentropy', # 损失函数 - 'metrics': 'accuracy', # 保存更好模型的评价标准 - 'is_training': True, # 训练后者是测试模型 - 'path_model_dir': path_model_dir, # 模型目录 - 'model_path': path_model, - # 模型地址, loss降低则保存的依据, save_best_only=True, save_weights_only=True - 'path_hyper_parameters': path_hyper_parameters, # 模型(包括embedding),超参数地址, - 'path_fineture': path_fineture, # 保存embedding trainable地址, 例如字向量、词向量、bert向量等 - }, - 'embedding': {'layer_indexes': [1, 2, 3, 12, 13], # bert取的层数,1为embedding层,未处理 - # 'corpus_path': 'Y:/BaiduNetdiskDownload/DataSet/bert-model/chinese_bert_chinese_wwm_L-12_H-768_A-12', # embedding预训练数据地址,不配则会默认取conf里边默认的地址 - # 'corpus_path':'Y:/BaiduNetdiskDownload/DataSet/bert-model/baidu_ernie', - # keras - bert可以加载谷歌版bert, 百度版ernie(需转换,https: // github.com / ArthurRizar / tensorflow_ernie), 哈工大版bert - wwm(tf框架,https: // github.com / ymcui / Chinese - BERT - wwm) - }, - 'data':{'train_data': path_baidu_qa_2019_train, # 训练数据 - 'val_data': path_baidu_qa_2019_valid # 验证数据 - }, - } - - # 删除先前存在的模型和embedding微调模型等 - delete_file(path_model_dir) - time_start = time.time() - # graph初始化 - graph = Graph(hyper_parameters) - print("graph init ok!") - ra_ed = graph.word_embedding - # 数据预处理 - pt = PreprocessText(path_model_dir) - x_train, y_train = pt.preprocess_label_ques_to_idx(hyper_parameters['embedding_type'], - hyper_parameters['data']['train_data'], - ra_ed, rate=rate, shuffle=True) - x_val, y_val = pt.preprocess_label_ques_to_idx(hyper_parameters['embedding_type'], - hyper_parameters['data']['val_data'], - ra_ed, rate=rate, shuffle=True) - print("data propress ok!") - print(len(y_train)) - # 训练 graph.fit(x_train, y_train, x_val, y_val) - print("耗时:" + str(time.time()-time_start)) - - -if __name__=="__main__": - train(rate=1) - - From a32bd3909904a27583b93e93b5f7ac66c450a405 Mon Sep 17 00:00:00 2001 From: atom-zh Date: Sat, 14 Aug 2021 01:05:03 +0800 Subject: [PATCH 5/5] add dataset --- dataset/category2labels.json | 73 +++++++++++++++++++++++ dataset/l2i_i2l.json | 108 +++++++++++++++++++++++++++++++++++ dataset/sample.xlsx | Bin 0 -> 10228 bytes 3 files changed, 181 insertions(+) create mode 100644 dataset/category2labels.json create mode 100644 dataset/l2i_i2l.json create mode 100644 dataset/sample.xlsx diff --git a/dataset/category2labels.json b/dataset/category2labels.json new file mode 100644 index 0000000..1c882ad --- /dev/null +++ b/dataset/category2labels.json @@ -0,0 +1,73 @@ +{ + "A": [ + "主题活动", + "党性学习", + "十九大", + "廉政教育", + "政策实践", + "相关会议", + "重要讲话" + ], + "B": [ + "听取工作汇报", + "换届选举", + "提升基层工作", + "科普人才队伍建设", + "群团工作会议" + ], + "C": [ + "助推新农村文化建设", + "城区精神文明共建", + "实践科技与科普服务平台", + "少数民族村的保护与发展", + "展览献爱心", + "文明单位考评", + "道德讲堂" + ], + "D":[ + "优秀人才评选", + "先进示范", + "双亮双比活动", + "授予荣誉称号", + "自制教具评选", + "表彰", + "评审" + ], + "E":[ + "三长制", + "创业青年座谈会", + "征求意见和建议", + "最美科技工作者", + "科技服务", + "科技者活动日", + "竞赛" + ], + "F":[ + "创新", + "助农", + "发展战略", + "对接发展", + "科技服务平台", + "科技节", + "经济发展新模式" + ], + "G":[ + "三下乡", + "科学素质小组会议", + "科技培训", + "科普服务", + "科普活动", + "科学普及" + ], + "H":[ + "座谈会", + "服务农业", + "服务群众", + "社区共建", + "走访调研调查", + "百汇联百村" + ], + "I":[ + "学会活动" + ] +} diff --git a/dataset/l2i_i2l.json b/dataset/l2i_i2l.json new file mode 100644 index 0000000..ca556a2 --- /dev/null +++ b/dataset/l2i_i2l.json @@ -0,0 +1,108 @@ +{ + "i2l": { + "0": "服务农业", + "1": "群团工作会议", + "2": "最美科技工作者", + "3": "文明单位考评", + "4": "社区共建", + "5": "十九大", + "6": "城区精神文明共建", + "7": "重要讲话", + "8": "创新", + "9": "创业青年座谈会", + "10": "展览献爱心", + "11": "科学素质小组会议", + "12": "对接发展", + "13": "党性学习", + "14": "三下乡", + "15": "提升基层工作", + "16": "科普活动", + "17": "发展战略", + "18": "听取工作汇报", + "19": "换届选举", + "20": "助推新农村文化建设", + "21": "征求意见和建议", + "22": "科技培训", + "23": "经济发展新模式", + "24": "自制教具评选", + "25": "优秀人才评选", + "26": "少数民族村的保护与发展", + "27": "科技服务平台", + "28": "授予荣誉称号", + "29": "竞赛", + "30": "科技服务", + "31": "座谈会", + "32": "科学普及", + "33": "双亮双比活动", + "34": "主题活动", + "35": "实践科技与科普服务平台", + "36": "政策实践", + "37": "科技者活动日", + "38": "科技节", + "39": "科普人才队伍建设", + "40": "道德讲堂", + "41": "科普服务", + "42": "廉政教育", + "43": "三长制", + "44": "助农", + "45": "服务群众", + "46": "评审", + "47": "先进示范", + "48": "相关会议", + "49": "走访调研调查", + "50": "表彰" + }, + "l2i": { + "三下乡": 14, + "三长制": 43, + "主题活动": 34, + "优秀人才评选": 25, + "先进示范": 47, + "党性学习": 13, + "创业青年座谈会": 9, + "创新": 8, + "助农": 44, + "助推新农村文化建设": 20, + "十九大": 5, + "双亮双比活动": 33, + "发展战略": 17, + "听取工作汇报": 18, + "城区精神文明共建": 6, + "实践科技与科普服务平台": 35, + "对接发展": 12, + "少数民族村的保护与发展": 26, + "展览献爱心": 10, + "座谈会": 31, + "廉政教育": 42, + "征求意见和建议": 21, + "换届选举": 19, + "授予荣誉称号": 28, + "提升基层工作": 15, + "政策实践": 36, + "文明单位考评": 3, + "最美科技工作者": 2, + "服务农业": 0, + "服务群众": 45, + "相关会议": 48, + "社区共建": 4, + "科学普及": 32, + "科学素质小组会议": 11, + "科技培训": 22, + "科技服务": 30, + "科技服务平台": 27, + "科技者活动日": 37, + "科技节": 38, + "科普人才队伍建设": 39, + "科普服务": 41, + "科普活动": 16, + "竞赛": 29, + "经济发展新模式": 23, + "群团工作会议": 1, + "自制教具评选": 24, + "表彰": 50, + "评审": 46, + "走访调研调查": 49, + "道德讲堂": 40, + "重要讲话": 7 + } +} \ No newline at end of file diff --git a/dataset/sample.xlsx b/dataset/sample.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..4d33f8f5071c834befde348fcd20e51d1651db6d GIT binary patch literal 10228 zcma)iWmp{B(lzc7WN--X65KVo27+sX3<>U<;O-%~TW}5T?h+t_ySuyd4JYqCk}J=* zf6VmLU8`z$Rqxf+yGlt88U_jCc?2r1iah`SYaoITjIE87?5u6!hTrW#1a=yl7iVL_DCJr8;l3 zW?Do9Eh4P~ow+$_o(_To^W~MxN(po%0aP$H^}0j&wn-Rlb%11mZIyxp{T^Yk?Ky)< zkt_qx_uX7bUyCGQAPN^kuAkj{3$m$MO7->9&!2@6uXLGfnW2z9_H|2QyIgYW=yBH1 zR(94N{VP}ag`sB_^rR1bmzjQOD}5A1yyemt%30Tj+a|D|l`YF@pPujjIl4Csd%pT` zb8>LO%)G*QyHgV1xvjJZ&AJ1vybsst(}fYP*OAjh(BY~k9q9@2CZJ7FtV4k(loKL1 zRjlQ4wQfg)`|if-1oofaEj+_z&;WZE2=*?<-@R*OZ3ldIulrX8#ZDH0-<9+{*{RU? zg=3Xs%2)_T2`b7E+I_~m>TqeIK_}BsNR#tEST=+>^H4OVV)W==xi5$AIL_9MtLQ?P zVZ(+QV#x_G0A84}Llfi@S0Gy+ac@Um8GTixw^gvAk~1gD8Bv`;#GDFL=L{6-=V|d0Sm*u?37VG&+gEaRwC* z2s##kl8+q@C}@?W3|eJY7@i}8RgPRul||MrTlL2lHr>^B zvL_=k;TbQBeXr@zcMU2l%`=KYMQBE#Lp2r1$qTy{OWTEjKXdH6%$)DOj_1iB!{_R< zmsHf0|I{T&YHmm35w=hG&nUFy`mwd4f+{gJkt^Mr>cZjAl8bm32kDf03CixR!E)Xh ze{rCQ7b(=x&C;Uz0m%moDx3D48-+rkmq%>LAfOcOsVcaP>&oKF=rb`W2Jx-jc=WjcU9U4#~4y|1-ltgWc+4@dvm3X92Wd0?yWU=Jp?fK!@LX zNQq$Hatj8`LAVeQc>gB*2ZZ%GP~iz<_N!dzflK_Lr}nk0PCQIZwzgr|<0|%pA%#y< zLj002Z!AJ*Wu+hQN-2%99}t%opdnAbH>PXEIck6Qcon*=Fs{H{1jLK%F(56JKRn%a z#rJrM``qxtS2AaUnaP+cZYA(z$4JpTPpuT=ho3}#N{|xyNkA8+-D`9S1t)aiY}mT8 z>iMw#B(QMX=z)l{SNj|PS$0Jb_1Xy|J*WxE(l7A#gJIu;&`6i0Nm)*Q4C>oZay3;? z`@YCw-r2juyw!ue{NdQ=T$kawA5 zmG-_}LrBAgDAVQOvE2O^`5rQNgD;+Twr0$W@bvDtIskGNLn(VIlyf4Nucs2et)Nj& zcy2GWL=$>XEOTYRpp76|)6IzS*b$w?_(t7l&mZv#u4mO~PKN0%IA%Yjug+&<4wFxfpsRqJ6-pE~``|h91rgOG85z$xkt@X%Es~%@LIe(T z=&>cP(P0OU`s}(?udS`9W~fcKJ-Tx3@l(L)LFOkr`HNvf;h}uz#FzJ3Qp3&;dV~b- zI^nougq(Ed!BUil@tPE@w!z_4`G|UP1#%XwJ>hbc((_K@;t{E|)x8vjQh^dt%URMk z%5u6PKN`q~VV4Y1m_xd-m1Wm3CbB!9uTa+IQ*Br8Z4+a4e8G`rd zl9)@vCz-rmfoUb7f>Z|uvywI#eg~KH!|g6CNh5eldQtCzrBUzItQc47D$*=j5hyUh zdex3d##AHGo`F%zxp6k~ck~CTA)O}Evk5Np7u)4@nDYQcD>KC27-1j_0=Gm~ ztM-rm@EuRYmul<+`@5ob#qcXNOsm_cAHr!+t}aMVVi$Ydio8M7C{Jy8+;s255b$)m zST|n~zhT9y(WMc}ebsAOvFP9uWBaP(t3XfCIq#5V?Imf3MYb;e8@$R3e}f6iXCbv* zVBuy% zWhC5z@&v0pj%V(Ly*{TX+2#2tVdS~DhUJULeR!(KO-un9iQbgW1Mi3=SmF=i7wyaX zvW|};G(MM&T`0TJeTR&nE`~{NV5W8W2($$LmdnW#$BoXxLO_t>|9a2;DfWl( zAM|;krC?3SjoysA?Wwun9A&&m!A1^WBeY8Ggefi?LR6q<7LVavPfEw%fZhLH#5>D$ zOwN1Jjdm7J+DrfEyEQI}WT+ctVml3nw^?SrePm3A?6hetCFeW$vg5kVrC zc%Vv8?qxv9{2e9l*rLP5hw}4=GS&Jv%sq8gg<1(010s3BQ4TF+Z7nqx=6g@)m6|v0 z&h-8S2(%>hc`n3+isndUn^|)8&S4*W1My!wv30!2IZ~#i6xhC2d)U%&DpMU*G!Wl_ z)9JLzctUFIAWgB_;dSL;rYV^syx7#=c?X@Z+k2t&VA85}3xWW^l>bZ#-KqS(w0Ojc zR17&F%=h_9yZIo=pZngr%y!W{-xIk^wEpRqz=LJ`HGruJZsWR#phe%>(s2(*-`DQ5 zJ?c{7;l(Z!Sc;Wu#{pH(lB2dUbRzBxVm7>@OQSMLRSnXX96(h}OV2C>QEnqwTW%D( z2OdI6B!p5%kay%523Dz6PrN??>*$O4cU4dZaah>ajS}f(z<5|10#CnjZ)+OaG=^MR z*}MDe+iv!wi_pBh$h35!+l}*+HIS6>RtsY%bc|%~O3_j4_1)Z?=GHX#tKEJLt0tC{ z)~E9s>sF5)xDFw0Hn*D#kZ_u?`_1r{=i_k~Vl3f)dtk+I;hlA5lizyVl9S27IE>}F#LC7wamu;(jVK?%ay+hoqT~IGsC9m+E z;g$oE27``2#z`Lb-wbsvh;C1FI^*W3o848SR7Kb6IvH7vH7O|5EBmQ13SNXMusRhV z3X!SnMoPCOzQ@{Gpji|m|M4ZPZ<_vF&KRT&=_`9&QR!;z z##{|4>M2#?N5r}kg7?3ha!bl=cA&`+Bu__n17rcNwTTYxkqOOWZZ~G;+Az(F& zZ(`f^W0Ps=++d9MxhidlCLV`&{8MP+GYj|9vuyEy$^?=pxbAl$$T9WO_9QUx7qaO30FQERkVW zQmLlkv|Ok9M1LMouJyyf|CDcJ%_2|rg!Lb(?zmA+en(WyID_(2Fsu{HTK>a5nh&OV zl^MMk8OCu)UYry=<*|ANpaS?%!jb#hX?V(~A>h9jE-Q1=Pl|aO5udUU}=>7^t9s$*V`y|ar+r|CN*U}P>JCrN+?%3=3pqNtM3GP z*C_KaTDsg-;d%|OcCDQvZ6gd!Q&@WTURpL8-gsQJ);M6z5nnw?FKo8i=zAM`erlc! zUg45%onElc3`Gj4ec7K+c)CQL2}J>COP1L5q0|2c8JbWS;$*wMaUMUG92Ws~_5uE% z-#ni$Sx{-fsd&611O)1@SBAZVs|C>hIfnZ z%>#1k0#$uuLyq_yxx@`b=r=O*8p z`|Q0*Nx8U={Jo3D(xVy6n=Z5shPkE3Np-x`y31;@xd_US9msR11^{Bc(ut16Ou_wt zjzBM9903zv={eLxYa<4X38k* z#qCZBy*O!e;?@_r^I!C%nca_mQUO(zt)$DcLFraQy4QyzQXc2Ga5r^!uuGQ@gkN># zR4We5D-W=~Ke?ZI5q0g_P~f(NsD-vtI`^6kad-565MQ;ABTihh&t-fqp2`{EPwG#| zf1cD;J;bLK`pNl|`SPc#Xiiu+e7%`Pq@_43C%xtL5Vzc|Mh{X65sGc#=k7f;OS|6d zeT|W{`A%nICtvhwDHum+z0Ud2BO<;ul8jK!ED9YJoARYNCgkmWdZe$K`DjZQvIE6M z!waji&FBGT%c{j63YHJD9|6bYhb49pVX-@LR-A-X%ANZhyWJ`jmqYusZFfMonv5m% zn1*V3|Lhxa$R|LNyg!rDG(8nywH#!hS-?ytkwsarYi4I*N+wme-)cOSa-(Xhz5NO| zi8q>ROTYQ!6^|ue19|&;6)Uv!*9@lf7xhiWD;#nwc2c~$%zjFVJf+Dv-x?y!uPhWBO>&Z%aW^>O>AU?tspD(|xT} z!EesOj~yqsm)}ug?V+c{IeV6ydWLsHFsugNu4mlp&R8Vn&b%coQ6J=Rz-FM=7nUq? z;Z*XWvD#fWsnoZgP*Y)7tt3hyRP`i2Pj(x$_~=#h_UjYNhTQ$TsK>X{Lv6au$gdCiMmwdmlRNsAl5(9~lNb zOfU6L;J)UAJ%>FGR8BIYS|E!w%E2+Cu)Y(3YV2+s%5ZnDe#H@G- z&YlcG7~07I#uSmemx>s5cguQAn}<=YJraZnq7X0m1D3+wD``qfk~}>!Ohl0cKNxsj zmz^mb!*X`pC%t|ah0(V|K@l}2Wg`MmK-39vTEa&KB}f~k`DwP;xX56ehMQ)<^Gbr0 zk|g!a(TC0&;;+dfECaB7-s{rZe<#EZLEy8?l-Gafnu0Y_PuX>`WT;a&x0Q&Irz5>6 zIOkZF`T2HDG^rgmrXv!5=rvdIN_Mwml}QBJiYV~2^iNLSnZ@0PAKu(mf|{?ZefxGY zVW(fF%oJegq@JrIhWk5)dK*3kq%BIubRU!#`|+i3 z^|leTwW21nO1Fcbei=1!=C;BP!iowRML$l4fSNF|d~%AcxOzZ{!7xfU@KCZ-euKB5RLt2tme@SuM5)9A*oRkO z->DG$m6veA9G&}E(m=vDdFQ}UT159jkLooWPnKIrQeq10+oIwq`H!7@gyvrB18ccG zr7h9bE>8H5IqEApYwd%~y<(cw_8%M)PJ(z%)8!2k@M8v;ib!s~&$xa6v$JMNZ8|uW?%#ppQAsMO`)pNOkGl?g~C#Lt*kZa@!@jaId4tsYDDWPnO$YkR6?YAJeSZDx1;4u1gO!87ltc@%(<@gT6xrUi}9wdGsqQ zg~#jnpRa1y_-kp>-BSln_aS{@D-yK`{MHzU*$bJ&h(wd}QinlE`tCT>q7?%YBrZ;n zK2S6L!iqGtkIB=aA6iicIuP)LUA``kL;{aL%4e+JPrgbM^oR*L2K@8`42a`2#E>FO zm)C>P&2UJQPf0wyy)7ZA;h{k62N}c1l+X~c!^RPgRrqPv$E2#G^0}OJdJbvU>l)N1 zzI-5p*&&p@E{+71)VBA-KFB7B^BCufNo4O~fP`KT{yuPQCtw}-etMNYCVw2J_@`R? zn7|xp92Uc^MSb*gnj~lyRt?sf40j#2q#|P^?)5AlpSxLDWQ}EkaZ;54(zmd=}yRx#; zJTWBKVbT9{fXDZP)9+@w*Aq3FAmsZ|67gg~m%UyR6!6n^;ygE$lz4AP$YICpuu@9u z#KrYU&^#HJh-D~5ZjU-d{bWRAW^fy7H}~{gMwzqH#s3Ms&kKzJ0r6Zv0`D%^e>AWI z8mlbYVzU5SZIp^|Kx-TG8m z;1xwac(~hYh5MZ+ZWk7=^q*~OQyMRC%_6t9G~@=vodeZ4$r&DRW@e-wrD==lTDrFz zOh}IePD{;K;`eq($WNCuF;TWx-nSw6Bzi3>v{}PIdvLRb9Uh$y7ZP=&mt7i^37>dX z3!ibLZoLoG6qJ4ma`Epm+h;)|R4Z?-FpTA{brl{Fx+DdHoZ!DYF1eEcEP0lUQ(Lt@ zNCiBqp_Oan{7~G9LQMSjc@vRnqJ6O7W~M%Ux#59H-OGjelsm{9;=KHp=@RlXIp4=Y zNR(U5-jU^vbS>pKcU*zEe#|KW+jcY4Am^ai?0!5Zj!c)~VfHxQXl30~V|k*%5BiW@ zvZJA|I{N0IC@->q(vSyh*SkVaGOsgqxj}DYw^|qw3Sjc$!yUkzgFtKeNDFu=Bq(>h z<;o7R?n0aKHzzHE(I?$N-rXn+2AMfQ#+MZ6Z-PuQx;Wx<{K>iOG$%bP{lbYx4!lp- zd6~w^Qppxl5)|$9a!D$_HF^t*y=fCLvEJ*dF(nuL?)s7lsS7S#R!sSI>KC{Zu2#PG z$83)Jw`TnwE}8A4}xmWuUO5BNTpd`ZkW$9 z-jxP(GdagESIq3aC_{~CD?OQH=A-g`q=u(nJl}4fV#vksJsKMlBjPe_HiD15&C%5EXpTQ-sIB?19#b4zVLu+gEKdLBS72b5R zpadLA-v^>Q0UtEEd#Z3u4Y74cyS6ZZ;f<1V(e(Y>*H=7DVTiWNZ+j!|uiaCBlwDO* z9=$IfrOz2-`39q!S+A5FBP6_mt0G%$6vAGB{p}o@vH6GMo6Qogx#~$B9|)j|Ht*Ds zPa6*Q_z;>SG?KNfrP(G6XL1`C99}K9R}m0BaD_5IQK^b%Xvh< zsx<|H`*4jIsl750Q6Pc;rK8^w7g~-|TQk2O5%67pZL{jdF%#}j?|OrXl0;bRhW8bO zYbW_tm?^%VZb=uQ5x5)Dq!k*pt#*v7GO|!(4n4~FXtAT4PB!7Y2`3cM0-jb){8h0n z7MY%ty2{H3IbiyK-`a%(2kM<2(Bk*n*e4#RrYG>VC2%m2|CC{VmMr-?BHP7+-gD%0 zgudCL;EatiprIz<>SvTnhQSC#my7YuBtX2FpayXhIf9xn*uSj%#=}jpWlwM1@hBHn~v?M zDv6}?b4uj#3L8=Qay45iSq+>@_N2d5QV{4~q!0Sk2xrUV-*jVbUAXsSSuinMY=oU* zT{SAz(c%ksnD6K8=~na$2{<_35j#hr!6Rqcz~T}UCR7Ps{{)qg`3>DxTgJp*T}6@* zs*K?q5fEsrqdjU&r)p4|&yOl=?}?JFw`)DlRfzMQwB0FFk@J7|<^OGy|0j2D9B1Jj zfZf;tb|(S&AN(TwCH7pL{MQQkmv_y;%cSSXx7l~h-S%fVt7NKvPsUWqCkP`3&yR-i zayrSi>3x}684mHfDCvH!M>K>Z6#pzW9Sr)DFyYP6-SyF=FFSOkS|g>7NpeizCfu8BT4oDKOP(RYEa zy4V4|iu(4s3Fjr(!$~KM{2jdRU)}gZbx^vKQ1TKY#z{|yZ0VvbK&FW!*_7g`gGrnBFSJLLM1-FI8prWLK9NaEoY(F zgd{rtGz~k!Po={Jo>K?9V(j?Vj71B+&8LufvRH;rz$T&slA-(*l4w}oy^4H{<%bKp z9kWTB^`~Zj=4QF&w~5-QTtouxs*LjVX&UFB)+l{M)dj3@YKoJB(Zk}%qf}ZPcD>$O zA!4g-a!zl1aU1)dWcxX7I=9svhGzw9i?ZUdHY3S#)t%ZAxr-%!YiH1157=0U!(611r%Lb3 z_o9o^T0nf|oJhMo%qI$MzKEKHdt4B~5foK)DDc{{ZncNd+JreC?7hjmI{Q)HSw(j- zfOPO{2_HVrQyPn@u`@i%kg9-9K zCHlW>_pPSGB|Mxxwb_E#vuTtkfpOD{+oqt(8`#bEf(&z6}{yjzd zOTi=YA9nxlw*NKh=hl{A<2k~