-
Notifications
You must be signed in to change notification settings - Fork 1
/
telegram_message_download_script.html
1681 lines (1259 loc) · 383 KB
/
telegram_message_download_script.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<!-- saved from url=(0065)https://chat.openai.com/chat/bcdd4e8b-395c-4d1b-be30-a272a95ee6f2 -->
<html data-kantu="1" class="dark" style="color-scheme: dark;"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no" class="jsx-74ac21e17e774b8"><title>telegram_message_download Script</title><meta name="next-head-count" content="4"><link rel="apple-touch-icon" sizes="180x180" href="https://chat.openai.com/apple-touch-icon.png"><link rel="icon" type="image/png" sizes="32x32" href="https://chat.openai.com/favicon-32x32.png"><link rel="icon" type="image/png" sizes="16x16" href="https://chat.openai.com/favicon-16x16.png"><link rel="preload" href="https://chat.openai.com/fonts/Signifier-Regular.otf" as="font" crossorigin=""><link rel="preload" href="https://chat.openai.com/fonts/Sohne-Buch.otf" as="font" crossorigin=""><link rel="preload" href="https://chat.openai.com/fonts/Sohne-Halbfett.otf" as="font" crossorigin=""><link rel="preload" href="https://chat.openai.com/fonts/SohneMono-Buch.otf" as="font" crossorigin=""><link rel="preload" href="https://chat.openai.com/fonts/SohneMono-Halbfett.otf" as="font" crossorigin=""><meta name="description" content="A conversational AI system that listens, learns, and challenges"><meta property="og:title" content="ChatGPT"><meta property="og:image" content="https://openai.com/content/images/2022/11/ChatGPT.jpg"><meta property="og:description" content="A conversational AI system that listens, learns, and challenges"><meta property="og:url" content="https://chat.openai.com"><link rel="preload" href="./telegram_message_download Script_files/a2ef30df7da3c920.css" as="style"><link rel="stylesheet" href="./telegram_message_download Script_files/a2ef30df7da3c920.css" data-n-g=""><noscript data-n-css=""></noscript><script defer="" nomodule="" src="./telegram_message_download Script_files/polyfills-c67a75d1b6f99dc8.js"></script><script src="./telegram_message_download Script_files/webpack-5fe30ad682a9f430.js" defer=""></script><script src="./telegram_message_download Script_files/framework-7a789ee31d2a7534.js" defer=""></script><script src="./telegram_message_download Script_files/main-149b337e061b4d04.js" defer=""></script><script src="./telegram_message_download Script_files/_app-33b85438dfa97ca0.js" defer=""></script><script src="./telegram_message_download Script_files/dde4e452-3591ea0c0fd388f8.js" defer=""></script><script src="./telegram_message_download Script_files/1f110208-44a6f43ddc5e9011.js" defer=""></script><script src="./telegram_message_download Script_files/2802bd5f-f554e76af2ab5bad.js" defer=""></script><script src="./telegram_message_download Script_files/68a27ff6-21204d8ed6fd05d7.js" defer=""></script><script src="./telegram_message_download Script_files/979-abe4a1d02a70bfd7.js" defer=""></script><script src="./telegram_message_download Script_files/424-7f7835d330adbaab.js" defer=""></script><script src="./telegram_message_download Script_files/762-02e501a2a689a81c.js" defer=""></script><script src="./telegram_message_download Script_files/620-32a8f72e9b47aba6.js" defer=""></script><script src="./telegram_message_download Script_files/686-77022f9e9ce68475.js" defer=""></script><script src="./telegram_message_download Script_files/814-7c8680720e417e5a.js" defer=""></script><script src="./telegram_message_download Script_files/858-d8a5b4a93f3054c9.js" defer=""></script><script src="./telegram_message_download Script_files/818-69f41a188b25db66.js" defer=""></script><script src="./telegram_message_download Script_files/[[...chatId]]-07b61bc76705b3ad.js" defer=""></script><script src="./telegram_message_download Script_files/_buildManifest.js" defer=""></script><script src="./telegram_message_download Script_files/_ssgManifest.js" defer=""></script><style data-styled="" data-styled-version="5.3.5"></style><meta name="react-scroll-to-bottom:version" content="4.2.0"><style type="text/css">
@font-face {
font-weight: 400;
font-style: normal;
font-family: 'Circular-Loom';
src: url('chrome-extension://liecbddmkiiihnedobmlmillhodjkdmb/fonts/CircularXXWeb-Book.woff2') format('woff2');
}
@font-face {
font-weight: 500;
font-style: normal;
font-family: 'Circular-Loom';
src: url('chrome-extension://liecbddmkiiihnedobmlmillhodjkdmb/fonts/CircularXXWeb-Medium.woff2') format('woff2');
}
@font-face {
font-weight: 700;
font-style: normal;
font-family: 'Circular-Loom';
src: url('chrome-extension://liecbddmkiiihnedobmlmillhodjkdmb/fonts/CircularXXWeb-Bold.woff2') format('woff2');
}
@font-face {
font-weight: 900;
font-style: normal;
font-family: 'Circular-Loom';
src: url('chrome-extension://liecbddmkiiihnedobmlmillhodjkdmb/fonts/CircularXXWeb-Black.woff2') format('woff2');
}</style><style data-emotion="react-scroll-to-bottom--css-ilqgk" data-s=""></style></head><body><div id="__next"><script>!function(){try{var d=document.documentElement,c=d.classList;c.remove('light','dark');var e=localStorage.getItem('theme');if('system'===e||(!e&&true)){var t='(prefers-color-scheme: dark)',m=window.matchMedia(t);if(m.media!==t||m.matches){d.style.colorScheme = 'dark';c.add('dark')}else{d.style.colorScheme = 'light';c.add('light')}}else if(e){c.add(e|| '')}if(e==='light'||e==='dark')d.style.colorScheme=e}catch(e){}}()</script><div class="overflow-hidden w-full h-full relative"><div class="flex h-full flex-1 flex-col md:pl-[260px]"><main class="relative h-full w-full transition-width flex flex-col overflow-hidden items-stretch flex-1"><div class="flex-1 overflow-hidden"><div class="h-full overflow-y-auto"><div class="flex flex-col items-center text-sm h-full dark:bg-gray-800"><div class="w-full border-b border-black/10 dark:border-gray-900/50 text-gray-800 dark:text-gray-100 group dark:bg-gray-800"><div class="text-base gap-4 md:gap-6 m-auto md:max-w-2xl lg:max-w-2xl xl:max-w-3xl p-4 md:py-6 flex lg:px-0"><div class="w-[30px] flex flex-col relative items-end"><div class="relative flex"><span style="box-sizing: border-box; display: inline-block; overflow: hidden; width: initial; height: initial; background: none; opacity: 1; border: 0px; margin: 0px; padding: 0px; position: relative; max-width: 100%;"><span style="box-sizing: border-box; display: block; width: initial; height: initial; background: none; opacity: 1; border: 0px; margin: 0px; padding: 0px; max-width: 100%;"><img alt="" aria-hidden="true" src="data:image/svg+xml,%3csvg%20xmlns=%27http://www.w3.org/2000/svg%27%20version=%271.1%27%20width=%2730%27%20height=%2730%27/%3e" style="display: block; max-width: 100%; width: initial; height: initial; background: none; opacity: 1; border: 0px; margin: 0px; padding: 0px;"></span><img alt="Hemang Joshi" srcset="/_next/image?url=https%3A%2F%2Flh3.googleusercontent.com%2Fa%2FAEdFTp4Iivkxu9DEtmTKT84x0yqTgiLRm1mSF09uLaUrhA%3Ds96-c&w=32&q=75 1x, /_next/image?url=https%3A%2F%2Flh3.googleusercontent.com%2Fa%2FAEdFTp4Iivkxu9DEtmTKT84x0yqTgiLRm1mSF09uLaUrhA%3Ds96-c&w=64&q=75 2x" src="./telegram_message_download Script_files/AEdFTp4Iivkxu9DEtmTKT84x0yqTgiLRm1mSF09uLaUrhA=s96-c.jpeg" decoding="async" data-nimg="intrinsic" class="rounded-sm" style="position: absolute; inset: 0px; box-sizing: border-box; padding: 0px; border: none; margin: auto; display: block; width: 0px; height: 0px; min-width: 100%; max-width: 100%; min-height: 100%; max-height: 100%;"></span></div></div><div class="relative flex w-[calc(100%-50px)] md:flex-col lg:w-[calc(100%-115px)]"><div class="flex flex-grow flex-col gap-3"><div class="min-h-[20px] flex flex-col items-start gap-4 whitespace-pre-wrap">```
###Here we are importing telethon library for downloading historical messages from telegram.
from telethon.tl.functions.channels import JoinChannelRequest
from telethon.sync import TelegramClient
import csv
import time
import re
import nest_asyncio
nest_asyncio.apply()
###Here api_id, api_hash and phone are user variables that has authentication login details.
api_id = 5903653
api_hash = '1343a1dd69d2d8fdf4b76b0f03be0e6a'
phone = '+917016525813'
client = TelegramClient(phone, api_id, api_hash)
###This python code helps to authenticate local client to telegram API.
await client.connect()
if not await client.is_user_authorized():
await client.send_code_request(phone)
await client.sign_in(phone, input('Enter the code: '))
else :
print('Autorised')
# ## Join telegram channel
# In[ ]:
###Here we are joining telegram channel by the bot to download the historical messages.
# await client(JoinChannelRequest('RatnakarSecuritiesPvtLtd'))
###Here channelURL string variable holds URL value achieved from telegram channel.
channelURL = 'https://t.me/INTRADAY_001'
await client(JoinChannelRequest(channelURL.split('https://t.me/')[1]))
# ## SAVE LAST N Messages From Ratnakar to txt file
# In[ ]:
###This function helps to download telegram channel messages to a file.
from telethon import TelegramClient, utils
from telethon.tl.types import InputMessagesFilterPhotos, InputMessagesFilterDocument
###Here num_of_msg_to_save integer variable determines how many total last messages we want to download.
num_of_msg_to_save = 100000
###Here main function iteratively downloads and saves the messsages to text file in a predefined format so that for further process the message and other characteristics of the message like time can be saperated..
async def main():
# print('**********************send_message*****************************')
# # Send a message
# # await client.send_message('me', 'Hello to myself!')
# print('*************************messages**************************')
messages = client.iter_messages(channelURL, limit=num_of_msg_to_save)
msges = '';
async for message in messages:
msg = str(message.date) + '~~~:::=== [' + str(utils.get_display_name(message.sender)) + ' : ' + str(message.message) + ' ] ===:::=== \n'
# print(msg)
msges = msges + msg
with open('./telegram/messages3.text', 'w') as file:
file.write(msges)
# print('*************************photos**************************')
# photos = await client.get_messages(channelURL, None, filter=InputMessagesFilterPhotos)
# total = len(photos)
# index = 0
# for photo in photos:
# filename = "./telegram/" + str(photo.id) + ".jpg"
# index = index + 1
# print("downloading:", index, "/", total, " : ", filename)
# # Download picture
# await client.download_media(photo, filename)
# print('*************************files**************************')
# files = await client.get_messages(channelURL, None, filter=InputMessagesFilterDocument)
# for file in files:
# attributes = file.media.document.attributes
# # File
# if len(attributes) == 1:
# fileName = file.media.document.attributes[0].file_name
# print(fileName)
# # Image Format
# if len(attributes) == 2:
# fileName = file.media.document.attributes[1].file_name
# print(fileName)
# # download file
# await client.download_media(file, "./telegram/" + fileName)
async with client:
client.loop.run_until_complete(main())
# ## CONVERT TXT FILE TO EXCEL OF SAPERATE MESSAGES
# In[ ]:
###Here we are opening saved telegream messages from a txt text file.
with open ("./telegram/messages2.text", "r") as myfile:
data = myfile.read()
# In[ ]:
###This python code helps saperate time and messages value from mesasges using '~~~:::=== ' saperator value.
message_lens = []
time_message_list = []
splet = data.split(' ===:::=== ')
for one_splet in splet:
try:
this_time = one_splet.split('~~~:::=== ')[0]
this_time = this_time.split('\n')[1]
this_message = one_splet.split('~~~:::=== ')[1][1:-1]
this_message = this_message.split('Ratnakar Securities Pvt. Ltd. : ')[1]
this_message_len = len(this_message.split())
print('message_len : '+str(this_message_len))
message_lens.append(this_message_len)
if(this_message_len<=16):
time_message_list.append({'time':this_time,'message':this_message})
print('===================')
except IndexError as ieee:
print(ieee)
# break
len(message_lens)
# In[ ]:
###This python code saves the xlsx file to local storage in file 'time_message_list_df2.xlsx'
time_message_list_df = pd.DataFrame(time_message_list)
time_message_list_df.dropna(inplace=True)
time_message_list_df.to_excel('./time_message_list_df2.xlsx',index=False)
time_message_list_df
# ## GET LIST OF SYMBOL FROM LIST OF TEXT
# In[ ]:
###This python code helps determine stock symbol names from message and add to pandas saperate column named 'symbol' in pandas dataframe time_message_list_df.
allinst2df = pd.read_csv('../../inst.csv')
# allinst2df.tradingsymbol
symbol_list_selected = []
import string
punc = string.punctuation
for one_msg in tqdm.tqdm(time_message_list_df.message):
thestring = one_msg
s = list(thestring)
splets = ''.join([o for o in s if not o in punc]).split()
this_symbol = 'NA'
this_symbol_found = False
for one_splet in splets:
# print(one_splet)
if(one_splet.upper() in allinst2df.tradingsymbol.to_list()):
symbol_list_selected.append(one_splet.upper())
this_symbol = one_splet.upper()
this_symbol_found = True
# print(one_symbol_from_all)
break
if(not this_symbol_found):
symbol_list_selected.append(this_symbol)
len(symbol_list_selected)
time_message_list_df['symbol']=symbol_list_selected
time_message_list_df
###For example the dataframe will look like after executing this code
# time message symbol
# 0 2022-09-08 06:50:40+00:00 FRIENDS..\n\nANYTIME MOMENTUM CAN COME IN DCMN... DCMNVL
# 1 2022-09-08 06:42:16+00:00 KEEP DCMNVL ON RADAR\n\nAND WAIT TILL THERE IS... DCMNVL
# 2 2022-09-08 06:35:28+00:00 DCMNVL... DCM Nouvelle Ltd\n\nHUGE MOMENTUM AB... DCMNVL
# 3 2022-09-08 06:33:50+00:00 SUPER DUPER FUNDAMENTALS SUPER
# 4 2022-09-08 06:32:47+00:00 DELIVERY\n\nBUY DCMNVL CMP 202.70 SL 175 TARGE... DCMNVL
# ... ... ... ...
# 11720 2019-04-04 04:17:21+00:00 INTRADAY : BUY GUJGASLTD ABOVE 153 SL 150 TAR... GUJGASLTD
# 11721 2019-04-04 04:12:08+00:00 DELIVERY : BUY BOMDYEING CMP 144.85 SL 134 TAR... BOMDYEING
# 11722 2019-04-03 09:58:55+00:00 BTST : MMTC CMP 29.25 MMTC
# 11723 2019-04-03 09:55:59+00:00 BTST : EMKAY CMP 110 EMKAY
# In[ ]:
# In[ ]:
# # Convert Label-Studio CSV to Huggingface autoTrain Token Classification Training File
# In[ ]:
###Here we are importing neccesary libraries for this project
import pandas as pd
import ast
from tqdm import tqdm
import math
import re
import numpy as np
import ipywidgets as widgets
from rich import print
from functools import partial
from IPython.display import clear_output
import joblib
import telegramAuth
# ##### Load Local saved Label-studio CSV export file
# In[ ]:
###Here we are importing csv file exported by labelstudio after labeling the dataset for NER or token classification application.
csv_import = pd.read_csv('./project-1-at-2022-10-06-05-35-9f232119.csv')
# csv_import
csv_import.head()
``` explain this code of our project to me. give me summary of it.</div></div><div class="text-gray-400 flex self-end lg:self-center justify-center mt-2 gap-4 lg:gap-1 lg:absolute lg:top-0 lg:translate-x-full lg:right-0 lg:mt-0 lg:pl-2 visible"><button class="p-1 rounded-md hover:bg-gray-100 hover:text-gray-700 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:invisible md:group-hover:visible"><svg stroke="currentColor" fill="none" stroke-width="2" viewBox="0 0 24 24" stroke-linecap="round" stroke-linejoin="round" class="h-4 w-4" height="1em" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"></path><path d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"></path></svg></button></div></div></div></div><div class="w-full border-b border-black/10 dark:border-gray-900/50 text-gray-800 dark:text-gray-100 group bg-gray-50 dark:bg-[#444654]"><div class="text-base gap-4 md:gap-6 m-auto md:max-w-2xl lg:max-w-2xl xl:max-w-3xl p-4 md:py-6 flex lg:px-0"><div class="w-[30px] flex flex-col relative items-end"><div class="relative h-[30px] w-[30px] p-1 rounded-sm text-white flex items-center justify-center" style="background-color: rgb(16, 163, 127);"><svg width="41" height="41" viewBox="0 0 41 41" fill="none" xmlns="http://www.w3.org/2000/svg" stroke-width="1.5" class="h-6 w-6"><path d="M37.5324 16.8707C37.9808 15.5241 38.1363 14.0974 37.9886 12.6859C37.8409 11.2744 37.3934 9.91076 36.676 8.68622C35.6126 6.83404 33.9882 5.3676 32.0373 4.4985C30.0864 3.62941 27.9098 3.40259 25.8215 3.85078C24.8796 2.7893 23.7219 1.94125 22.4257 1.36341C21.1295 0.785575 19.7249 0.491269 18.3058 0.500197C16.1708 0.495044 14.0893 1.16803 12.3614 2.42214C10.6335 3.67624 9.34853 5.44666 8.6917 7.47815C7.30085 7.76286 5.98686 8.3414 4.8377 9.17505C3.68854 10.0087 2.73073 11.0782 2.02839 12.312C0.956464 14.1591 0.498905 16.2988 0.721698 18.4228C0.944492 20.5467 1.83612 22.5449 3.268 24.1293C2.81966 25.4759 2.66413 26.9026 2.81182 28.3141C2.95951 29.7256 3.40701 31.0892 4.12437 32.3138C5.18791 34.1659 6.8123 35.6322 8.76321 36.5013C10.7141 37.3704 12.8907 37.5973 14.9789 37.1492C15.9208 38.2107 17.0786 39.0587 18.3747 39.6366C19.6709 40.2144 21.0755 40.5087 22.4946 40.4998C24.6307 40.5054 26.7133 39.8321 28.4418 38.5772C30.1704 37.3223 31.4556 35.5506 32.1119 33.5179C33.5027 33.2332 34.8167 32.6547 35.9659 31.821C37.115 30.9874 38.0728 29.9178 38.7752 28.684C39.8458 26.8371 40.3023 24.6979 40.0789 22.5748C39.8556 20.4517 38.9639 18.4544 37.5324 16.8707ZM22.4978 37.8849C20.7443 37.8874 19.0459 37.2733 17.6994 36.1501C17.7601 36.117 17.8666 36.0586 17.936 36.0161L25.9004 31.4156C26.1003 31.3019 26.2663 31.137 26.3813 30.9378C26.4964 30.7386 26.5563 30.5124 26.5549 30.2825V19.0542L29.9213 20.998C29.9389 21.0068 29.9541 21.0198 29.9656 21.0359C29.977 21.052 29.9842 21.0707 29.9867 21.0902V30.3889C29.9842 32.375 29.1946 34.2791 27.7909 35.6841C26.3872 37.0892 24.4838 37.8806 22.4978 37.8849ZM6.39227 31.0064C5.51397 29.4888 5.19742 27.7107 5.49804 25.9832C5.55718 26.0187 5.66048 26.0818 5.73461 26.1244L13.699 30.7248C13.8975 30.8408 14.1233 30.902 14.3532 30.902C14.583 30.902 14.8088 30.8408 15.0073 30.7248L24.731 25.1103V28.9979C24.7321 29.0177 24.7283 29.0376 24.7199 29.0556C24.7115 29.0736 24.6988 29.0893 24.6829 29.1012L16.6317 33.7497C14.9096 34.7416 12.8643 35.0097 10.9447 34.4954C9.02506 33.9811 7.38785 32.7263 6.39227 31.0064ZM4.29707 13.6194C5.17156 12.0998 6.55279 10.9364 8.19885 10.3327C8.19885 10.4013 8.19491 10.5228 8.19491 10.6071V19.808C8.19351 20.0378 8.25334 20.2638 8.36823 20.4629C8.48312 20.6619 8.64893 20.8267 8.84863 20.9404L18.5723 26.5542L15.206 28.4979C15.1894 28.5089 15.1703 28.5155 15.1505 28.5173C15.1307 28.5191 15.1107 28.516 15.0924 28.5082L7.04046 23.8557C5.32135 22.8601 4.06716 21.2235 3.55289 19.3046C3.03862 17.3858 3.30624 15.3413 4.29707 13.6194ZM31.955 20.0556L22.2312 14.4411L25.5976 12.4981C25.6142 12.4872 25.6333 12.4805 25.6531 12.4787C25.6729 12.4769 25.6928 12.4801 25.7111 12.4879L33.7631 17.1364C34.9967 17.849 36.0017 18.8982 36.6606 20.1613C37.3194 21.4244 37.6047 22.849 37.4832 24.2684C37.3617 25.6878 36.8382 27.0432 35.9743 28.1759C35.1103 29.3086 33.9415 30.1717 32.6047 30.6641C32.6047 30.5947 32.6047 30.4733 32.6047 30.3889V21.188C32.6066 20.9586 32.5474 20.7328 32.4332 20.5338C32.319 20.3348 32.154 20.1698 31.955 20.0556ZM35.3055 15.0128C35.2464 14.9765 35.1431 14.9142 35.069 14.8717L27.1045 10.2712C26.906 10.1554 26.6803 10.0943 26.4504 10.0943C26.2206 10.0943 25.9948 10.1554 25.7963 10.2712L16.0726 15.8858V11.9982C16.0715 11.9783 16.0753 11.9585 16.0837 11.9405C16.0921 11.9225 16.1048 11.9068 16.1207 11.8949L24.1719 7.25025C25.4053 6.53903 26.8158 6.19376 28.2383 6.25482C29.6608 6.31589 31.0364 6.78077 32.2044 7.59508C33.3723 8.40939 34.2842 9.53945 34.8334 10.8531C35.3826 12.1667 35.5464 13.6095 35.3055 15.0128ZM14.2424 21.9419L10.8752 19.9981C10.8576 19.9893 10.8423 19.9763 10.8309 19.9602C10.8195 19.9441 10.8122 19.9254 10.8098 19.9058V10.6071C10.8107 9.18295 11.2173 7.78848 11.9819 6.58696C12.7466 5.38544 13.8377 4.42659 15.1275 3.82264C16.4173 3.21869 17.8524 2.99464 19.2649 3.1767C20.6775 3.35876 22.0089 3.93941 23.1034 4.85067C23.0427 4.88379 22.937 4.94215 22.8668 4.98473L14.9024 9.58517C14.7025 9.69878 14.5366 9.86356 14.4215 10.0626C14.3065 10.2616 14.2466 10.4877 14.2479 10.7175L14.2424 21.9419ZM16.071 17.9991L20.4018 15.4978L24.7325 17.9975V22.9985L20.4018 25.4983L16.071 22.9985V17.9991Z" fill="currentColor"></path></svg></div></div><div class="relative flex w-[calc(100%-50px)] md:flex-col lg:w-[calc(100%-115px)]"><div class="flex flex-grow flex-col gap-3"><div class="min-h-[20px] flex flex-col items-start gap-4 whitespace-pre-wrap"><div class="markdown prose w-full break-words dark:prose-invert dark"><p>This script uses the Telethon library to download historical messages from a Telegram channel. It starts by importing various libraries, including Telethon, CSV, time, re, and nest_asyncio. The nest_asyncio library is used to run asyncio code within an existing event loop.</p><p>The script then defines several variables: <code>api_id</code>, <code>api_hash</code>, and <code>phone</code>, which are used to authenticate the client with the Telegram API. It then creates a TelegramClient object using these variables, and uses the <code>connect()</code> method to connect to Telegram's servers. If the client is not authorized, it will prompt the user to enter a code that is sent to the phone number provided, and then use the <code>sign_in()</code> method to authorize the client.</p><p>Next, the script uses the <code>JoinChannelRequest</code> function to join the specified Telegram channel, by passing the channel URL. After the channel is joined, the script defines a function <code>main()</code> that uses the <code>iter_messages()</code> method of the TelegramClient object to download a specified number of messages from the channel. These messages are saved to a text file in a specific format, so that they can be further processed. The script also includes commented out code that shows how to download photos and files from the channel, but this code is not currently being executed.</p></div></div></div><div class="text-gray-400 flex self-end lg:self-center justify-center mt-2 gap-4 lg:gap-1 lg:absolute lg:top-0 lg:translate-x-full lg:right-0 lg:mt-0 lg:pl-2 visible"><button class="p-1 rounded-md hover:bg-gray-100 hover:text-gray-700 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400"><svg stroke="currentColor" fill="none" stroke-width="2" viewBox="0 0 24 24" stroke-linecap="round" stroke-linejoin="round" class="h-4 w-4" height="1em" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M14 9V5a3 3 0 0 0-3-3l-4 9v11h11.28a2 2 0 0 0 2-1.7l1.38-9a2 2 0 0 0-2-2.3zM7 22H4a2 2 0 0 1-2-2v-7a2 2 0 0 1 2-2h3"></path></svg></button><button class="p-1 rounded-md hover:bg-gray-100 hover:text-gray-700 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400"><svg stroke="currentColor" fill="none" stroke-width="2" viewBox="0 0 24 24" stroke-linecap="round" stroke-linejoin="round" class="h-4 w-4" height="1em" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M10 15v4a3 3 0 0 0 3 3l4-9V2H5.72a2 2 0 0 0-2 1.7l-1.38 9a2 2 0 0 0 2 2.3zm7-13h2.67A2.31 2.31 0 0 1 22 4v7a2.31 2.31 0 0 1-2.33 2H17"></path></svg></button></div></div></div></div><div class="w-full border-b border-black/10 dark:border-gray-900/50 text-gray-800 dark:text-gray-100 group dark:bg-gray-800"><div class="text-base gap-4 md:gap-6 m-auto md:max-w-2xl lg:max-w-2xl xl:max-w-3xl p-4 md:py-6 flex lg:px-0"><div class="w-[30px] flex flex-col relative items-end"><div class="relative flex"><span style="box-sizing: border-box; display: inline-block; overflow: hidden; width: initial; height: initial; background: none; opacity: 1; border: 0px; margin: 0px; padding: 0px; position: relative; max-width: 100%;"><span style="box-sizing: border-box; display: block; width: initial; height: initial; background: none; opacity: 1; border: 0px; margin: 0px; padding: 0px; max-width: 100%;"><img alt="" aria-hidden="true" src="data:image/svg+xml,%3csvg%20xmlns=%27http://www.w3.org/2000/svg%27%20version=%271.1%27%20width=%2730%27%20height=%2730%27/%3e" style="display: block; max-width: 100%; width: initial; height: initial; background: none; opacity: 1; border: 0px; margin: 0px; padding: 0px;"></span><img alt="Hemang Joshi" srcset="/_next/image?url=https%3A%2F%2Flh3.googleusercontent.com%2Fa%2FAEdFTp4Iivkxu9DEtmTKT84x0yqTgiLRm1mSF09uLaUrhA%3Ds96-c&w=32&q=75 1x, /_next/image?url=https%3A%2F%2Flh3.googleusercontent.com%2Fa%2FAEdFTp4Iivkxu9DEtmTKT84x0yqTgiLRm1mSF09uLaUrhA%3Ds96-c&w=64&q=75 2x" src="./telegram_message_download Script_files/AEdFTp4Iivkxu9DEtmTKT84x0yqTgiLRm1mSF09uLaUrhA=s96-c.jpeg" decoding="async" data-nimg="intrinsic" class="rounded-sm" style="position: absolute; inset: 0px; box-sizing: border-box; padding: 0px; border: none; margin: auto; display: block; width: 0px; height: 0px; min-width: 100%; max-width: 100%; min-height: 100%; max-height: 100%;"></span></div></div><div class="relative flex w-[calc(100%-50px)] md:flex-col lg:w-[calc(100%-115px)]"><div class="flex flex-grow flex-col gap-3"><div class="min-h-[20px] flex flex-col items-start gap-4 whitespace-pre-wrap">```
# convert_label_studio_to_hngFace_autoTrain_dataset(csv_import)
# In[ ]:
def NEW_LOGIC_convert_label_studio_to_hngFace_autoTrain_dataset(csv_import) -> list:
'''Converts export file from Label-Studio to HuggingFace autoTrain trainable dataset
Example :
***** INPUT of this function **** (file : ../project-1-at-2022-09-13-06-14-0f0c0db3.csv))
text id annotator annotation_id created_at updated_at lead_time label
KEEP JAGSNPHARM ON RADAR... 1001 1 1007 2022-09-13T05:51:47.578986Z 2022-09-13T05:51:47.579052Z 4.298 [{"start": 7, "end": 17, "text": "JAGSNPHARM", "labels": ["symbol"]}]
INTRADAY : BUY JAGSNPHARM ABOVE 306 SL 302 TARGET 312 - 316 - 320 1000 1 1006 2022-09-13T05:51:41.795524Z 2022-09-13T05:51:41.795587Z 6.055 [{"start": 15, "end": 25, "text": "JAGSNPHARM", "labels": ["symbol"]}, {"start": 32, "end": 35, "text": "306", "labels": ["enter"]}, {"start": 39, "end": 42, "text": "302", "labels": ["sl"]}, {"start": 50, "end": 53, "text": "312", "labels": ["exit"]}]
SUPER DUPER FUNDAMENTALS IN JAGSNPHARM... 999 1 1005 2022-09-13T05:51:34.283369Z 2022-09-13T05:51:34.283431Z 2.903 [{"start": 28, "end": 38, "text": "JAGSNPHARM", "labels": ["symbol"]}]
***** OUTPUT of this function **** (file : ./dataset_for_huggingface_autoTrain_label_vlassification.csv)
text label
['KEEP ', 'JAGSNPHARM', ' ON RADAR... '] ['NANA', 'symbol', 'NANA']
['INTRADAY : BUY ', 'JAGSNPHARM', ' ABOVE ', '306', ' SL ', '302', ' TARGET ', '312', ' - 316 - 320 '] ['NANA', 'symbol', 'NANA', 'enter', 'NANA', 'sl', 'NANA', 'exit', 'NANA']
['JAGSNPHARM', '... '] ['symbol', 'NANA']
'''
final_dataset = []
for indx in csv_import.index:
this_label = csv_import.label[indx]
this_test_str = ''
# print(f'{indx=}')
# print(f'{this_label=}')
# print(f'{type(this_test_str)=}')
# print(f'{type(this_label)=}')
if(type(this_test_str) != type(this_label)):
if(np.isnan(this_label)):
final_dataset.append({'text':'','label':'NANA'})
continue
this_literal_list = ast.literal_eval(this_label)
start_pos_of_label__list = []
for ipdata in this_literal_list:
start_pos_of_label__list.append(ipdata['start'])
start_pos_of_label__list
start_pos_of_label__array = np.array(start_pos_of_label__list)
sort_index = np.argsort(start_pos_of_label__array)
# print(start_pos_of_label__array)
# print(sort_index)
this_text = csv_import.text[indx]
final_label_text = []
final_label_annotation = []
init_labeling = True
last_end_position = 0
for i in sort_index:
# print(this_literal_list[i])
this_label_type = this_literal_list[i]['labels'][0]
# print(f'{this_label_type=}')
this_label_start_position = this_literal_list[i]['start']
this_label_end_position = this_literal_list[i]['end']
# print(f'{this_label_start_position=}')
# print(f'{this_label_end_position=}')
# print(f'{this_text=}')
# print(f'{this_text[this_label_start_position:this_label_end_position]=}')
# print()
if(init_labeling & this_label_start_position!=0):
final_label_text.append(this_text[0:this_label_start_position])
final_label_annotation.append('NANA')
final_label_text.append(this_text[this_label_start_position:this_label_end_position])
final_label_annotation.append(this_label_type)
if((i)<sort_index[-1]):
next_word_start_position = this_literal_list[i+1]['start']
if(this_label_end_position!=next_word_start_position):
final_label_text.append(this_text[this_label_end_position:next_word_start_position])
final_label_annotation.append('NANA')
last_end_position=this_label_end_position
# print('==debug==')
# print(f'{i=}')
# print(f'{sort_index[-1]=}')
# print(f'{this_label_end_position=}')
# print(f'{len(this_text)=}')
# print(f'{((i==sort_index[-1]) and this_label_end_position<len(this_text))=}')
# print('==debug==')
if((i==sort_index[-1]) and this_label_end_position<len(this_text)):
final_label_text.append(this_text[this_label_end_position:])
final_label_annotation.append('NANA')
init_labeling = False
# break
# print('--------------------')
# display(f'{final_label_text=}')
# display(f'{final_label_annotation=}')
final_dataset.append({'text':final_label_text,'label':final_label_annotation})
return final_dataset
# In[ ]:
# this_dataset_df = pd.DataFrame(NEW_LOGIC_convert_label_studio_to_hngFace_autoTrain_dataset(csv_import))
# this_dataset_df.to_csv('./dataset_for_huggingface_autoTrain_label_vlassification_NEW_LOGIC.csv',index=False)
# this_dataset_df
# In[ ]:
# 192.168.227.62 - - [19/Sep/2022 18:09:36] "GET /api/jobs/65320ac73a4847a2b5a7f8ddbd45b821 HTTP/1.1" 200 -
# 2022-09-19 18:09:37 sparsify.blueprints.jobs INFO getting job 65320ac73a4847a2b5a7f8ddbd45b821
# 2022-09-19 18:09:37 sparsify.blueprints.jobs INFO retrieved job {'job': {'created': '2022-09-19T18:09:22.636614', 'worker_args': {'model_id': '5f276c2809654a24adb8d4ed4c3da3d7', 'profile_id': '4cbcefb9e2fe4a3fa7903604f72ea5a3', 'batch_size': 10, 'core_count': 8, 'pruning_estimations': True, 'quantized_estimations': False, 'iterations_per_check': 10, 'warmup_iterations_per_check': 5}, 'modified': '2022-09-19T18:09:36.205104', 'type_': 'CreatePerfProfileJobWorker', 'error': None, 'project_id': 'e1852305571c4f138149d12312acc30d', 'job_id': '65320ac73a4847a2b5a7f8ddbd45b821', 'progress': None, 'status': 'started'}}
# 192.168.227.62 - - [19/Sep/2022 18:09:37] "GET /api/jobs/65320ac73a4847a2b5a7f8ddbd45b821 HTTP/1.1" 200 -
# 2022-09-19 18:09:37 sparsify.workers.manager WARNING Job 65320ac73a4847a2b5a7f8ddbd45b821 errored out Invalid input shape, cannot create a random input shape from: (None,)
# 2022-09-19 18:09:37 sparsify.blueprints.jobs INFO getting job 65320ac73a4847a2b5a7f8ddbd45b821
# 2022-09-19 18:09:37 sparsify.blueprints.jobs INFO retrieved job {'job': {'created': '2022-09-19T18:09:22.636614', 'worker_args': {'model_id': '5f276c2809654a24adb8d4ed4c3da3d7', 'profile_id': '4cbcefb9e2fe4a3fa7903604f72ea5a3', 'batch_size': 10, 'core_count': 8, 'pruning_estimations': True, 'quantized_estimations': False, 'iterations_per_check': 10, 'warmup_iterations_per_check': 5}, 'modified': '2022-09-19T18:09:37.427305', 'type_': 'CreatePerfProfileJobWorker', 'error': 'Invalid input shape, cannot create a random input shape from: (None,)', 'project_id': 'e1852305571c4f138149d12312acc30d', 'job_id': '65320ac73a4847a2b5a7f8ddbd45b821', 'progress': None, 'status': 'canceled'}}
# 192.168.227.62 - - [19/Sep/2022 18:09:37] "GET /api/jobs/65320ac73a4847a2b5a7f8ddbd45b821 HTTP/1.1" 200 -
# # Define Predict Function Using Trained Model From AutoTrain Huggingface Returns Dictionary With Data
# In[ ]:
# pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
# In[ ]:
###### LOAD PRETRAINED MODEL FROM HUGGINGFACE autoTrain #################
from transformers import AutoModelForTokenClassification, AutoTokenizer
model = AutoModelForTokenClassification.from_pretrained("hemangjoshi37a/autotrain-stocks-ner-2000-sample-test-1676759313", use_auth_token=True)
tokenizer = AutoTokenizer.from_pretrained("hemangjoshi37a/autotrain-stocks-ner-2000-sample-test-1676759313", use_auth_token=True)
# In[ ]:
# #### EXPORT HUGGINFFACE MODEL TO ONNX TYPE MODEL FOR SPARSE LEARNING #####
# base_model=model
# from transformers.models.distilbert import DistilBertConfig, DistilBertOnnxConfig
# DistilBertOnnxConfig(base_model.config)
# from pathlib import Path
# from transformers.onnx import export
# from transformers import AutoTokenizer, AutoModel
# onnx_path = Path("./autotrain-ratnakar_1000_sample_curated-1474454086")
# # model_ckpt = "distilbert-base-uncased"
# # base_model = AutoModel.from_pretrained(model_ckpt)
# # tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
# onnx_inputs, onnx_outputs = export(tokenizer, base_model, DistilBertOnnxConfig(base_model.config), DistilBertOnnxConfig(base_model.config).default_onnx_opset, onnx_path)
# from transformers import AutoConfig
# config = AutoConfig.from_pretrained('hemangjoshi37a/autotrain-ratnakar_1000_sample_curated-1474454086')
# tokenizer.save_pretrained('./model_files/')
# config.save_pretrained('./model_files/')
# In[ ]:
class ClassifiedMsgClass:
classification_number_to_name_dict = {0 : '',
1 : 'btst',
2 : 'delivery',
3 : 'enter',
4 : 'momentum',
5 : 'exit',
6 : 'exit2',
7 : 'exit3',
8 : 'intraday',
9 : 'sl',
10 : 'symbol',
11 : 'touched'}
classification_name_to_number_dict = {
'': 0 ,
'btst' : 1 ,
'delivery' : 2 ,
'enter' : 3 ,
'momentum' : 4 ,
'exit' : 5 ,
'exit2' : 6 ,
'exit3' : 7 ,
'intraday' : 8 ,
'sl' : 9 ,
'symbol' : 10 ,
'touched' : 11 ,
}
def __init__(self,msg):
self.msg = msg
self.btst = ''
self.delivery = ''
self.enter = 0
self.momentum =0
self.exit = 0
self.exit2 = 0
self.exit3 = 0
self.intraday = ''
self.sl = 0
self.symbol = ''
self.touched = 0
self.get_class_map_from_message_NEW(self.msg)
def dict_to_self_attr(self, *initial_data, **kwargs):
for dictionary in initial_data:
for key in dictionary:
setattr(self, key, dictionary[key])
for key in kwargs:
setattr(self, key, kwargs[key])
def get_class_map_from_message_NEW(self,input_message:str) -> dict:
########### PREDICT TEXT AND CLASSIFY WORDS ##########
ip1 = tokenizer(input_message,return_tensors='pt')
op1 = model(**ip1)
current_word = ''
sentence = []
sentence_class= []
sentence_class_name= []
list_of_decoded_words = tokenizer.batch_decode(ip1['input_ids'][0])
last_word_contained_hash = False
last_classification_numner = 0
last_decoded_word = ''
for onet in range(len(ip1['input_ids'][0])):
this_token = ip1['input_ids'][0][onet]
this_classification = op1.logits[0][onet].tolist()
this_decoded_word = list_of_decoded_words[onet]
this_classification_number = np.argmax(this_classification)
if(this_decoded_word=='[CLS]' or this_decoded_word=='[SEP]'):
continue
# print(f'{this_decoded_word=}')
# # print(f'{this_classification=}')
# print(f'{this_classification_number=}')
this_word_contains_hash= '#' in this_decoded_word
if('#' in this_decoded_word):
hash_replaced_word = this_decoded_word.replace('#','')
# print(f'''{hash_replaced_word=}''')
current_word = current_word+hash_replaced_word
# print(f'{current_word=}')
last_word_contained_hash=True
elif((this_classification_number==last_classification_numner) and ((this_decoded_word=='.') or (last_decoded_word=='.'))):
last_classification_numner = this_classification_number
current_word = current_word+this_decoded_word
else:
# print('========== insidious ===============')
sentence.append(current_word)
sentence_class.append(last_classification_numner)
sentence_class_name.append(self.classification_number_to_name_dict[last_classification_numner])
# print(f'{current_word=}')
# print(f'{sentence=}')
# print(f'{last_classification_numner=}')
# print(f'{sentence_class=}')
# print(f'{current_word=}')
current_word=this_decoded_word
last_classification_numner = this_classification_number
last_word_contained_hash=False
last_decoded_word = this_decoded_word
# print('======================================')
sentence.append(current_word)
sentence_class.append(last_classification_numner)
sentence_class_name.append(self.classification_number_to_name_dict[last_classification_numner])
self.predictions_df = pd.DataFrame({'sentence':sentence,
'sentence_class':sentence_class,
'sentence_class_name':sentence_class_name,
})
self.predictions_df = self.predictions_df.apply(pd.to_numeric, errors='coerce').fillna(self.predictions_df)
labelled_df = self.predictions_df.loc[self.predictions_df.sentence_class_name!='']
# display(labelled_df)
keys = labelled_df.sentence_class_name
values = labelled_df.sentence
self.predictions_dict = dict(zip(keys, values))
self.dict_to_self_attr(self.predictions_dict)
# print(dictionary)
return self.predictions_dict
# class_number_to_name_dict
# In[ ]:
this_classified_msg = ClassifiedMsgClass('''SUPER DUPER DELIVERY BUY
DELIVERY : BUY MAHEPC CMP 108.70 SL 95 TARGET 140 - 165 - 180 - 198+''')
this_classified_msg
# In[ ]:
# messagesdf = pd.read_excel('./time_message_list_df2.xlsx')
# del messagesdf['symbol']
# messagesdf['something'] = ''
# messagesdf['prediction'] = ''
# messagesdf['isPredAccurate'] = ''
# messagesdf['symbol'] = ''
# messagesdf['enter'] = ''
# messagesdf['sl'] = ''
# messagesdf['exit'] = ''
# messagesdf['insttkn'] = ''
# messagesdf
# In[ ]:
``` explain this next part of this project to me.</div></div><div class="text-gray-400 flex self-end lg:self-center justify-center mt-2 gap-4 lg:gap-1 lg:absolute lg:top-0 lg:translate-x-full lg:right-0 lg:mt-0 lg:pl-2 visible"><button class="p-1 rounded-md hover:bg-gray-100 hover:text-gray-700 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:invisible md:group-hover:visible"><svg stroke="currentColor" fill="none" stroke-width="2" viewBox="0 0 24 24" stroke-linecap="round" stroke-linejoin="round" class="h-4 w-4" height="1em" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"></path><path d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"></path></svg></button></div></div></div></div><div class="w-full border-b border-black/10 dark:border-gray-900/50 text-gray-800 dark:text-gray-100 group bg-gray-50 dark:bg-[#444654]"><div class="text-base gap-4 md:gap-6 m-auto md:max-w-2xl lg:max-w-2xl xl:max-w-3xl p-4 md:py-6 flex lg:px-0"><div class="w-[30px] flex flex-col relative items-end"><div class="relative h-[30px] w-[30px] p-1 rounded-sm text-white flex items-center justify-center" style="background-color: rgb(16, 163, 127);"><svg width="41" height="41" viewBox="0 0 41 41" fill="none" xmlns="http://www.w3.org/2000/svg" stroke-width="1.5" class="h-6 w-6"><path d="M37.5324 16.8707C37.9808 15.5241 38.1363 14.0974 37.9886 12.6859C37.8409 11.2744 37.3934 9.91076 36.676 8.68622C35.6126 6.83404 33.9882 5.3676 32.0373 4.4985C30.0864 3.62941 27.9098 3.40259 25.8215 3.85078C24.8796 2.7893 23.7219 1.94125 22.4257 1.36341C21.1295 0.785575 19.7249 0.491269 18.3058 0.500197C16.1708 0.495044 14.0893 1.16803 12.3614 2.42214C10.6335 3.67624 9.34853 5.44666 8.6917 7.47815C7.30085 7.76286 5.98686 8.3414 4.8377 9.17505C3.68854 10.0087 2.73073 11.0782 2.02839 12.312C0.956464 14.1591 0.498905 16.2988 0.721698 18.4228C0.944492 20.5467 1.83612 22.5449 3.268 24.1293C2.81966 25.4759 2.66413 26.9026 2.81182 28.3141C2.95951 29.7256 3.40701 31.0892 4.12437 32.3138C5.18791 34.1659 6.8123 35.6322 8.76321 36.5013C10.7141 37.3704 12.8907 37.5973 14.9789 37.1492C15.9208 38.2107 17.0786 39.0587 18.3747 39.6366C19.6709 40.2144 21.0755 40.5087 22.4946 40.4998C24.6307 40.5054 26.7133 39.8321 28.4418 38.5772C30.1704 37.3223 31.4556 35.5506 32.1119 33.5179C33.5027 33.2332 34.8167 32.6547 35.9659 31.821C37.115 30.9874 38.0728 29.9178 38.7752 28.684C39.8458 26.8371 40.3023 24.6979 40.0789 22.5748C39.8556 20.4517 38.9639 18.4544 37.5324 16.8707ZM22.4978 37.8849C20.7443 37.8874 19.0459 37.2733 17.6994 36.1501C17.7601 36.117 17.8666 36.0586 17.936 36.0161L25.9004 31.4156C26.1003 31.3019 26.2663 31.137 26.3813 30.9378C26.4964 30.7386 26.5563 30.5124 26.5549 30.2825V19.0542L29.9213 20.998C29.9389 21.0068 29.9541 21.0198 29.9656 21.0359C29.977 21.052 29.9842 21.0707 29.9867 21.0902V30.3889C29.9842 32.375 29.1946 34.2791 27.7909 35.6841C26.3872 37.0892 24.4838 37.8806 22.4978 37.8849ZM6.39227 31.0064C5.51397 29.4888 5.19742 27.7107 5.49804 25.9832C5.55718 26.0187 5.66048 26.0818 5.73461 26.1244L13.699 30.7248C13.8975 30.8408 14.1233 30.902 14.3532 30.902C14.583 30.902 14.8088 30.8408 15.0073 30.7248L24.731 25.1103V28.9979C24.7321 29.0177 24.7283 29.0376 24.7199 29.0556C24.7115 29.0736 24.6988 29.0893 24.6829 29.1012L16.6317 33.7497C14.9096 34.7416 12.8643 35.0097 10.9447 34.4954C9.02506 33.9811 7.38785 32.7263 6.39227 31.0064ZM4.29707 13.6194C5.17156 12.0998 6.55279 10.9364 8.19885 10.3327C8.19885 10.4013 8.19491 10.5228 8.19491 10.6071V19.808C8.19351 20.0378 8.25334 20.2638 8.36823 20.4629C8.48312 20.6619 8.64893 20.8267 8.84863 20.9404L18.5723 26.5542L15.206 28.4979C15.1894 28.5089 15.1703 28.5155 15.1505 28.5173C15.1307 28.5191 15.1107 28.516 15.0924 28.5082L7.04046 23.8557C5.32135 22.8601 4.06716 21.2235 3.55289 19.3046C3.03862 17.3858 3.30624 15.3413 4.29707 13.6194ZM31.955 20.0556L22.2312 14.4411L25.5976 12.4981C25.6142 12.4872 25.6333 12.4805 25.6531 12.4787C25.6729 12.4769 25.6928 12.4801 25.7111 12.4879L33.7631 17.1364C34.9967 17.849 36.0017 18.8982 36.6606 20.1613C37.3194 21.4244 37.6047 22.849 37.4832 24.2684C37.3617 25.6878 36.8382 27.0432 35.9743 28.1759C35.1103 29.3086 33.9415 30.1717 32.6047 30.6641C32.6047 30.5947 32.6047 30.4733 32.6047 30.3889V21.188C32.6066 20.9586 32.5474 20.7328 32.4332 20.5338C32.319 20.3348 32.154 20.1698 31.955 20.0556ZM35.3055 15.0128C35.2464 14.9765 35.1431 14.9142 35.069 14.8717L27.1045 10.2712C26.906 10.1554 26.6803 10.0943 26.4504 10.0943C26.2206 10.0943 25.9948 10.1554 25.7963 10.2712L16.0726 15.8858V11.9982C16.0715 11.9783 16.0753 11.9585 16.0837 11.9405C16.0921 11.9225 16.1048 11.9068 16.1207 11.8949L24.1719 7.25025C25.4053 6.53903 26.8158 6.19376 28.2383 6.25482C29.6608 6.31589 31.0364 6.78077 32.2044 7.59508C33.3723 8.40939 34.2842 9.53945 34.8334 10.8531C35.3826 12.1667 35.5464 13.6095 35.3055 15.0128ZM14.2424 21.9419L10.8752 19.9981C10.8576 19.9893 10.8423 19.9763 10.8309 19.9602C10.8195 19.9441 10.8122 19.9254 10.8098 19.9058V10.6071C10.8107 9.18295 11.2173 7.78848 11.9819 6.58696C12.7466 5.38544 13.8377 4.42659 15.1275 3.82264C16.4173 3.21869 17.8524 2.99464 19.2649 3.1767C20.6775 3.35876 22.0089 3.93941 23.1034 4.85067C23.0427 4.88379 22.937 4.94215 22.8668 4.98473L14.9024 9.58517C14.7025 9.69878 14.5366 9.86356 14.4215 10.0626C14.3065 10.2616 14.2466 10.4877 14.2479 10.7175L14.2424 21.9419ZM16.071 17.9991L20.4018 15.4978L24.7325 17.9975V22.9985L20.4018 25.4983L16.071 22.9985V17.9991Z" fill="currentColor"></path></svg></div></div><div class="relative flex w-[calc(100%-50px)] md:flex-col lg:w-[calc(100%-115px)]"><div class="flex flex-grow flex-col gap-3"><div class="min-h-[20px] flex flex-col items-start gap-4 whitespace-pre-wrap"><div class="markdown prose w-full break-words dark:prose-invert dark"><p>This script defines a function <code>NEW_LOGIC_convert_label_studio_to_hngFace_autoTrain_dataset(csv_import)</code> that converts an export file from Label-Studio to a format that is compatible with the HuggingFace autoTrain library. This function expects a single argument, <code>csv_import</code>, which is a CSV file containing a dataset labeled using Label-Studio.</p><p>The function begins by initializing an empty list called <code>final_dataset</code>, which will later be populated with the processed data. It then loops through the rows of the input CSV file, extracting the <code>text</code> and <code>label</code> columns of each row. The <code>label</code> column contains a list of labeled regions in the text, represented as a string of JSON-formatted data.</p><p>The function then uses the <code>literal_eval()</code> function from the ast library to convert this string of JSON data into a Python list. It then extracts the starting position of each labeled region and stores it in a list <code>start_pos_of_label__list</code>. This list is sorted by starting position so that the labeled regions can be identified in their original order in the text.</p><p>Next, the function loops through the text of each row, using the starting positions of the labeled regions to split the text into a list of substrings. Each substring is then tagged with the corresponding label from the <code>label</code> column, and the resulting list of substrings and labels is added to the <code>final_dataset</code> list.</p><p>The final dataset is returned in format of List of Dictionaries with text and label and in format ready to be used for training with HuggingFace autoTrain library. Each row corresponds to a substring of the original text and its corresponding label. 'NANA' is used as a placeholder label for any un-labeled regions of the text.</p></div></div></div><div class="text-gray-400 flex self-end lg:self-center justify-center mt-2 gap-4 lg:gap-1 lg:absolute lg:top-0 lg:translate-x-full lg:right-0 lg:mt-0 lg:pl-2 visible"><button class="p-1 rounded-md hover:bg-gray-100 hover:text-gray-700 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400"><svg stroke="currentColor" fill="none" stroke-width="2" viewBox="0 0 24 24" stroke-linecap="round" stroke-linejoin="round" class="h-4 w-4" height="1em" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M14 9V5a3 3 0 0 0-3-3l-4 9v11h11.28a2 2 0 0 0 2-1.7l1.38-9a2 2 0 0 0-2-2.3zM7 22H4a2 2 0 0 1-2-2v-7a2 2 0 0 1 2-2h3"></path></svg></button><button class="p-1 rounded-md hover:bg-gray-100 hover:text-gray-700 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400"><svg stroke="currentColor" fill="none" stroke-width="2" viewBox="0 0 24 24" stroke-linecap="round" stroke-linejoin="round" class="h-4 w-4" height="1em" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M10 15v4a3 3 0 0 0 3 3l4-9V2H5.72a2 2 0 0 0-2 1.7l-1.38 9a2 2 0 0 0 2 2.3zm7-13h2.67A2.31 2.31 0 0 1 22 4v7a2.31 2.31 0 0 1-2.33 2H17"></path></svg></button></div></div></div></div><div class="w-full border-b border-black/10 dark:border-gray-900/50 text-gray-800 dark:text-gray-100 group dark:bg-gray-800"><div class="text-base gap-4 md:gap-6 m-auto md:max-w-2xl lg:max-w-2xl xl:max-w-3xl p-4 md:py-6 flex lg:px-0"><div class="w-[30px] flex flex-col relative items-end"><div class="relative flex"><span style="box-sizing: border-box; display: inline-block; overflow: hidden; width: initial; height: initial; background: none; opacity: 1; border: 0px; margin: 0px; padding: 0px; position: relative; max-width: 100%;"><span style="box-sizing: border-box; display: block; width: initial; height: initial; background: none; opacity: 1; border: 0px; margin: 0px; padding: 0px; max-width: 100%;"><img alt="" aria-hidden="true" src="data:image/svg+xml,%3csvg%20xmlns=%27http://www.w3.org/2000/svg%27%20version=%271.1%27%20width=%2730%27%20height=%2730%27/%3e" style="display: block; max-width: 100%; width: initial; height: initial; background: none; opacity: 1; border: 0px; margin: 0px; padding: 0px;"></span><img alt="Hemang Joshi" srcset="/_next/image?url=https%3A%2F%2Flh3.googleusercontent.com%2Fa%2FAEdFTp4Iivkxu9DEtmTKT84x0yqTgiLRm1mSF09uLaUrhA%3Ds96-c&w=32&q=75 1x, /_next/image?url=https%3A%2F%2Flh3.googleusercontent.com%2Fa%2FAEdFTp4Iivkxu9DEtmTKT84x0yqTgiLRm1mSF09uLaUrhA%3Ds96-c&w=64&q=75 2x" src="./telegram_message_download Script_files/AEdFTp4Iivkxu9DEtmTKT84x0yqTgiLRm1mSF09uLaUrhA=s96-c.jpeg" decoding="async" data-nimg="intrinsic" class="rounded-sm" style="position: absolute; inset: 0px; box-sizing: border-box; padding: 0px; border: none; margin: auto; display: block; width: 0px; height: 0px; min-width: 100%; max-width: 100%; min-height: 100%; max-height: 100%;"></span></div></div><div class="relative flex w-[calc(100%-50px)] md:flex-col lg:w-[calc(100%-115px)]"><div class="flex flex-grow flex-col gap-3"><div class="min-h-[20px] flex flex-col items-start gap-4 whitespace-pre-wrap">```
# # Convert messagesdf message to predictions in 'prediction' column in messagesdf
# In[ ]:
# for index,data in tqdm(messagesdf.iterrows()):
# this_classified_msg = ClassifiedMsgClass(data['message'])
# messagesdf.at[index, 'prediction'] = this_classified_msg.predictions_dict
# In[ ]:
# In[ ]:
# # Interactively Curate True and Falsified Predictions and store to joblib file
# In[ ]:
##### Load messagedf using joblib #######
# messagesdf = joblib.load('./messagesdf.p')
messagesdf = pd.read_csv('./messagesdf.csv',parse_dates=['time'])
messagesdf['time'] = messagesdf['time'].dt.tz_localize(None)
# joblib.dump(messagesdf,'./curated_predictions.p')
sample_number = pd.read_csv('./sample_number.csv')['sample_number'][0]
# joblib.dump(pd.DataFrame({'sample_number':sample_number},index=[0]),'./sample_number.p')
messagesdf
# In[ ]:
#### Sample Number Selector Carousal Widget ###
def carousel_box_color(i,sample_number) -> str:
if(i==sample_number):
return 'danger'
else:
return 'warning'
def carousel_on_sample_number_click(widget_data):
global sample_number
clear_output(wait=True)
# print(widget_data)
# print(int(widget_data.description))
sample_number = int(widget_data.description)
# print(f'{sample_number=}')
display_annotations()
display_common_widget()
sample_number+=1
def carousal_number_range(sample_number) -> range:
if(sample_number>40):
return range(sample_number-20,sample_number+20)
else:
return range(50)
def display_sample_number_selector_carousal(sample_number):
item_layout = widgets.Layout(height='25px', min_width='60px')
items = [widgets.Button(layout=item_layout, description=str(i), button_style=carousel_box_color(i,sample_number)) for i in carousal_number_range(sample_number)]
box_layout = widgets.Layout(overflow='scroll hidden',
border='3px solid black',
width='700px',
height='',
flex_flow='row',
display='flex')
sample_number_selector_carousel = widgets.Box(children=items, layout=box_layout,)
for each_item in items:
each_item.on_click(partial(carousel_on_sample_number_click))
display(sample_number_selector_carousel)
# display_sample_number_selector_carousal(sample_number)
# In[ ]:
####### Fx for displaying widget and predicted annotations #########
def display_annotations():
global messagesdf
print(messagesdf.message[sample_number])
# this_predictions=list_of_predictions[sample_number]
this_classified_msg_instance = ClassifiedMsgClass(messagesdf.message[sample_number])
this_predictions = this_classified_msg_instance.predictions_df
## inserting value in cell [index(messagesdf_index),prediction(coumn_name)]
messagesdf.iloc[sample_number,2]=[this_classified_msg_instance.predictions_dict]*1
messagesdf.to_csv('./messagesdf.csv',index=False)
pd.DataFrame({'sample_number':sample_number},index=[0]).to_csv('./sample_number.csv',index=False)
display(this_predictions)
def display_common_widget():
print(f'----- {sample_number} -----')
display(widgets.HBox([previous_button,false_pred_button,true_pred_button, next_button]))
if(sample_number<=5):
display(messagesdf.head(9))
if(sample_number>5):
display(messagesdf[sample_number-4:sample_number+5])
display_sample_number_selector_carousal(sample_number)
####### Define onClick functions for buttons #########
def true_prediction_onclick(ipip):
global sample_number,messagesdf
clear_output(wait=True)
messagesdf.iloc[sample_number,3]=True
sample_number+=1
## inserting value in cell [index(messagesdf_index),isPredAccurate(coumn_name)]
display_annotations()
display_common_widget()
def false_prediction_onclick(ipip):
global sample_number,messagesdf
clear_output(wait=True)
messagesdf.iloc[sample_number,3]=False
sample_number+=1
## inserting value in cell [index(messagesdf_index),isPredAccurate(coumn_name)]
display_annotations()
display_common_widget()
def previous_button_onclick(ipip):
global sample_number
sample_number-=1
clear_output(wait=True)
display_annotations()
display_common_widget()
def next_button_onclick(ipip):
global sample_number
sample_number+=1
clear_output(wait=True)
display_annotations()
display_common_widget()
# In[ ]:
####### Define button instances ########
true_pred_button = widgets.Button(
description='True Prediction',
disabled=False,
button_style='success',
tooltip='Click me',
icon='check'
)
true_pred_button.on_click(partial(true_prediction_onclick))
false_pred_button = widgets.Button(
description='False Prediction',
disabled=False,
button_style='danger',
tooltip='Click me',
icon='close'
)
false_pred_button.on_click(partial(false_prediction_onclick))
previous_button = widgets.Button(
description='⏮️ Previous',
disabled=False,
button_style='info',
tooltip='Click me',
)
previous_button.on_click(partial(previous_button_onclick))
next_button = widgets.Button(
description='Next ⏭️',
disabled=False,
button_style='info',
tooltip='Click me',
)
next_button.on_click(partial(next_button_onclick))
display_annotations()
display_common_widget()
# # Reinforcement Learning For isPredAccurate
# In[ ]:
import gym
import numpy as np
import pandas as pd
from gym import spaces
from keras.models import Sequential
from keras.layers import Dense, Embedding, Reshape
from keras.optimizers import Adam
from keras.utils import to_categorical
from rl.agents import SARSAAgent
from rl.memory import SequentialMemory
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction import FeatureHasher
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from scipy.sparse import hstack
import json
# Load and preprocess the data
df = messagesdf[['message','prediction','isPredAccurate']][:180]
# Convert the 'prediction' column to a list of dictionaries
df['prediction'] = df['prediction'].apply(lambda x: json.loads(x.replace("'", '"')))
vectorizer = CountVectorizer()
X_message = vectorizer.fit_transform(df['message'])
dict_vectorizer = DictVectorizer()
X_prediction = dict_vectorizer.fit_transform(df['prediction'])
X = hstack((X_message, X_prediction))
X = X.toarray()
Y = df['isPredAccurate'].values
# Split the data into train and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
# Define the gym environment
class TokenPredictionEnv(gym.Env):
def __init__(self, X, Y):
self.X = X
self.Y = Y
self.current_index = 0
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Box(low=0, high=1, shape=(X.shape[1],))
def reset(self):
self.current_index = 0
return self.X[self.current_index]
def step(self, action):
self.current_index += 1
if self.current_index >= self.X.shape[0]:
return self.X[-1], 0, True, {}
else:
reward = 1 if action == self.Y[self.current_index] else -1
return self.X[self.current_index], reward, False, {}
# Create the environment
env = TokenPredictionEnv(X_train, Y_train)
# Create the model
model = Sequential()
model.add(Reshape((X.shape[1],), input_shape=(1, X.shape[1])))
model.add(Dense(15, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(2, activation='softmax'))
# Create the agent
sarsa = SARSAAgent(model=model, nb_actions=env.action_space.n)
sarsa.compile(Adam(learning_rate=1e-4), metrics=['accuracy'])
sarsa.memory = SequentialMemory(limit=500000, window_length=1)
from livelossplot import PlotLossesKeras
# Create the LiveLossPlot callback
plot_losses = PlotLossesKeras()
# Train the agent
history = sarsa.fit(env, nb_steps=5000, verbose=0, callbacks=[plot_losses])
# Test the agent
sarsa.test(env, nb_episodes=10, visualize=False)
# In[ ]:
# Create a new environment using the test data
test_env = TokenPredictionEnv(X_test, Y_test)
# Test the agent on the test data
sarsa.test(test_env, nb_episodes=10, visualize=False)
# In[ ]:
df['prediction'][182].replace("'",'"')
# In[ ]:
# Load and preprocess the data
df = messagesdf[['message','prediction','isPredAccurate']][181:200]
# Convert the 'prediction' column to a list of dictionaries
df['prediction'] = df['prediction'].apply(lambda x: json.loads(str(x).replace("'", '"')))
X_message = vectorizer.transform(df['message'])
X_prediction = dict_vectorizer.transform(df['prediction'])
X = hstack((X_message, X_prediction))
X = X.toarray()
X = X.reshape(X.shape[0],1,X.shape[1])
Y = df['isPredAccurate'].values
# # Get the model's prediction for a single example
# X_example = X_test[0] # select the first example from the test set
# y_pred = sarsa.model.predict(X_example)
# Get the model's prediction for a batch of examples
# X_batch = X_test[:10] # select the first 10 examples from the test set
X_batch = X
y_pred = sarsa.model.predict(X_batch)
y_pred
``` explain this next part of this project to me.</div></div><div class="text-gray-400 flex self-end lg:self-center justify-center mt-2 gap-4 lg:gap-1 lg:absolute lg:top-0 lg:translate-x-full lg:right-0 lg:mt-0 lg:pl-2 visible"><button class="p-1 rounded-md hover:bg-gray-100 hover:text-gray-700 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:invisible md:group-hover:visible"><svg stroke="currentColor" fill="none" stroke-width="2" viewBox="0 0 24 24" stroke-linecap="round" stroke-linejoin="round" class="h-4 w-4" height="1em" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"></path><path d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"></path></svg></button></div></div></div></div><div class="w-full border-b border-black/10 dark:border-gray-900/50 text-gray-800 dark:text-gray-100 group bg-gray-50 dark:bg-[#444654]"><div class="text-base gap-4 md:gap-6 m-auto md:max-w-2xl lg:max-w-2xl xl:max-w-3xl p-4 md:py-6 flex lg:px-0"><div class="w-[30px] flex flex-col relative items-end"><div class="relative h-[30px] w-[30px] p-1 rounded-sm text-white flex items-center justify-center" style="background-color: rgb(16, 163, 127);"><svg width="41" height="41" viewBox="0 0 41 41" fill="none" xmlns="http://www.w3.org/2000/svg" stroke-width="1.5" class="h-6 w-6"><path d="M37.5324 16.8707C37.9808 15.5241 38.1363 14.0974 37.9886 12.6859C37.8409 11.2744 37.3934 9.91076 36.676 8.68622C35.6126 6.83404 33.9882 5.3676 32.0373 4.4985C30.0864 3.62941 27.9098 3.40259 25.8215 3.85078C24.8796 2.7893 23.7219 1.94125 22.4257 1.36341C21.1295 0.785575 19.7249 0.491269 18.3058 0.500197C16.1708 0.495044 14.0893 1.16803 12.3614 2.42214C10.6335 3.67624 9.34853 5.44666 8.6917 7.47815C7.30085 7.76286 5.98686 8.3414 4.8377 9.17505C3.68854 10.0087 2.73073 11.0782 2.02839 12.312C0.956464 14.1591 0.498905 16.2988 0.721698 18.4228C0.944492 20.5467 1.83612 22.5449 3.268 24.1293C2.81966 25.4759 2.66413 26.9026 2.81182 28.3141C2.95951 29.7256 3.40701 31.0892 4.12437 32.3138C5.18791 34.1659 6.8123 35.6322 8.76321 36.5013C10.7141 37.3704 12.8907 37.5973 14.9789 37.1492C15.9208 38.2107 17.0786 39.0587 18.3747 39.6366C19.6709 40.2144 21.0755 40.5087 22.4946 40.4998C24.6307 40.5054 26.7133 39.8321 28.4418 38.5772C30.1704 37.3223 31.4556 35.5506 32.1119 33.5179C33.5027 33.2332 34.8167 32.6547 35.9659 31.821C37.115 30.9874 38.0728 29.9178 38.7752 28.684C39.8458 26.8371 40.3023 24.6979 40.0789 22.5748C39.8556 20.4517 38.9639 18.4544 37.5324 16.8707ZM22.4978 37.8849C20.7443 37.8874 19.0459 37.2733 17.6994 36.1501C17.7601 36.117 17.8666 36.0586 17.936 36.0161L25.9004 31.4156C26.1003 31.3019 26.2663 31.137 26.3813 30.9378C26.4964 30.7386 26.5563 30.5124 26.5549 30.2825V19.0542L29.9213 20.998C29.9389 21.0068 29.9541 21.0198 29.9656 21.0359C29.977 21.052 29.9842 21.0707 29.9867 21.0902V30.3889C29.9842 32.375 29.1946 34.2791 27.7909 35.6841C26.3872 37.0892 24.4838 37.8806 22.4978 37.8849ZM6.39227 31.0064C5.51397 29.4888 5.19742 27.7107 5.49804 25.9832C5.55718 26.0187 5.66048 26.0818 5.73461 26.1244L13.699 30.7248C13.8975 30.8408 14.1233 30.902 14.3532 30.902C14.583 30.902 14.8088 30.8408 15.0073 30.7248L24.731 25.1103V28.9979C24.7321 29.0177 24.7283 29.0376 24.7199 29.0556C24.7115 29.0736 24.6988 29.0893 24.6829 29.1012L16.6317 33.7497C14.9096 34.7416 12.8643 35.0097 10.9447 34.4954C9.02506 33.9811 7.38785 32.7263 6.39227 31.0064ZM4.29707 13.6194C5.17156 12.0998 6.55279 10.9364 8.19885 10.3327C8.19885 10.4013 8.19491 10.5228 8.19491 10.6071V19.808C8.19351 20.0378 8.25334 20.2638 8.36823 20.4629C8.48312 20.6619 8.64893 20.8267 8.84863 20.9404L18.5723 26.5542L15.206 28.4979C15.1894 28.5089 15.1703 28.5155 15.1505 28.5173C15.1307 28.5191 15.1107 28.516 15.0924 28.5082L7.04046 23.8557C5.32135 22.8601 4.06716 21.2235 3.55289 19.3046C3.03862 17.3858 3.30624 15.3413 4.29707 13.6194ZM31.955 20.0556L22.2312 14.4411L25.5976 12.4981C25.6142 12.4872 25.6333 12.4805 25.6531 12.4787C25.6729 12.4769 25.6928 12.4801 25.7111 12.4879L33.7631 17.1364C34.9967 17.849 36.0017 18.8982 36.6606 20.1613C37.3194 21.4244 37.6047 22.849 37.4832 24.2684C37.3617 25.6878 36.8382 27.0432 35.9743 28.1759C35.1103 29.3086 33.9415 30.1717 32.6047 30.6641C32.6047 30.5947 32.6047 30.4733 32.6047 30.3889V21.188C32.6066 20.9586 32.5474 20.7328 32.4332 20.5338C32.319 20.3348 32.154 20.1698 31.955 20.0556ZM35.3055 15.0128C35.2464 14.9765 35.1431 14.9142 35.069 14.8717L27.1045 10.2712C26.906 10.1554 26.6803 10.0943 26.4504 10.0943C26.2206 10.0943 25.9948 10.1554 25.7963 10.2712L16.0726 15.8858V11.9982C16.0715 11.9783 16.0753 11.9585 16.0837 11.9405C16.0921 11.9225 16.1048 11.9068 16.1207 11.8949L24.1719 7.25025C25.4053 6.53903 26.8158 6.19376 28.2383 6.25482C29.6608 6.31589 31.0364 6.78077 32.2044 7.59508C33.3723 8.40939 34.2842 9.53945 34.8334 10.8531C35.3826 12.1667 35.5464 13.6095 35.3055 15.0128ZM14.2424 21.9419L10.8752 19.9981C10.8576 19.9893 10.8423 19.9763 10.8309 19.9602C10.8195 19.9441 10.8122 19.9254 10.8098 19.9058V10.6071C10.8107 9.18295 11.2173 7.78848 11.9819 6.58696C12.7466 5.38544 13.8377 4.42659 15.1275 3.82264C16.4173 3.21869 17.8524 2.99464 19.2649 3.1767C20.6775 3.35876 22.0089 3.93941 23.1034 4.85067C23.0427 4.88379 22.937 4.94215 22.8668 4.98473L14.9024 9.58517C14.7025 9.69878 14.5366 9.86356 14.4215 10.0626C14.3065 10.2616 14.2466 10.4877 14.2479 10.7175L14.2424 21.9419ZM16.071 17.9991L20.4018 15.4978L24.7325 17.9975V22.9985L20.4018 25.4983L16.071 22.9985V17.9991Z" fill="currentColor"></path></svg></div></div><div class="relative flex w-[calc(100%-50px)] md:flex-col lg:w-[calc(100%-115px)]"><div class="flex flex-grow flex-col gap-3"><div class="min-h-[20px] flex flex-col items-start gap-4 whitespace-pre-wrap"><div class="markdown prose w-full break-words dark:prose-invert dark"><p>This script defines several functions and uses them to interactively curate true and falsified predictions from a dataset. The script loads the dataset from a CSV file and assigns it to a variable named <code>messagesdf</code>.</p><p>The first function defined is <code>carousel_box_color(i, sample_number)</code>, which takes as input an index <code>i</code> and a sample number <code>sample_number</code>, and returns a string that specifies the color of a button in a carousel. If <code>i</code> is equal to <code>sample_number</code>, the function returns 'danger', otherwise it returns 'warning'.</p><p>The next function is <code>carousel_on_sample_number_click(widget_data)</code>, which is called when a button in the carousel is clicked. This function updates the global variable <code>sample_number</code> to the value of the clicked button's index, calls the <code>display_annotations()</code> and <code>display_common_widget()</code> functions, and increments the <code>sample_number</code> by 1.</p><p>The function <code>carousal_number_range(sample_number)</code> takes a sample number as an input, and return a range which of 40 numbers before and 40 numbers after the sample number.</p><p>The function <code>display_sample_number_selector_carousal(sample_number)</code> creates a carousel of buttons, with each button representing a sample</p></div></div></div><div class="text-gray-400 flex self-end lg:self-center justify-center mt-2 gap-4 lg:gap-1 lg:absolute lg:top-0 lg:translate-x-full lg:right-0 lg:mt-0 lg:pl-2 visible"><button class="p-1 rounded-md hover:bg-gray-100 hover:text-gray-700 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400"><svg stroke="currentColor" fill="none" stroke-width="2" viewBox="0 0 24 24" stroke-linecap="round" stroke-linejoin="round" class="h-4 w-4" height="1em" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M14 9V5a3 3 0 0 0-3-3l-4 9v11h11.28a2 2 0 0 0 2-1.7l1.38-9a2 2 0 0 0-2-2.3zM7 22H4a2 2 0 0 1-2-2v-7a2 2 0 0 1 2-2h3"></path></svg></button><button class="p-1 rounded-md hover:bg-gray-100 hover:text-gray-700 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400"><svg stroke="currentColor" fill="none" stroke-width="2" viewBox="0 0 24 24" stroke-linecap="round" stroke-linejoin="round" class="h-4 w-4" height="1em" width="1em" xmlns="http://www.w3.org/2000/svg"><path d="M10 15v4a3 3 0 0 0 3 3l4-9V2H5.72a2 2 0 0 0-2 1.7l-1.38 9a2 2 0 0 0 2 2.3zm7-13h2.67A2.31 2.31 0 0 1 22 4v7a2.31 2.31 0 0 1-2.33 2H17"></path></svg></button></div></div></div></div><div class="w-full border-b border-black/10 dark:border-gray-900/50 text-gray-800 dark:text-gray-100 group dark:bg-gray-800"><div class="text-base gap-4 md:gap-6 m-auto md:max-w-2xl lg:max-w-2xl xl:max-w-3xl p-4 md:py-6 flex lg:px-0"><div class="w-[30px] flex flex-col relative items-end"><div class="relative flex"><span style="box-sizing: border-box; display: inline-block; overflow: hidden; width: initial; height: initial; background: none; opacity: 1; border: 0px; margin: 0px; padding: 0px; position: relative; max-width: 100%;"><span style="box-sizing: border-box; display: block; width: initial; height: initial; background: none; opacity: 1; border: 0px; margin: 0px; padding: 0px; max-width: 100%;"><img alt="" aria-hidden="true" src="data:image/svg+xml,%3csvg%20xmlns=%27http://www.w3.org/2000/svg%27%20version=%271.1%27%20width=%2730%27%20height=%2730%27/%3e" style="display: block; max-width: 100%; width: initial; height: initial; background: none; opacity: 1; border: 0px; margin: 0px; padding: 0px;"></span><img alt="Hemang Joshi" srcset="/_next/image?url=https%3A%2F%2Flh3.googleusercontent.com%2Fa%2FAEdFTp4Iivkxu9DEtmTKT84x0yqTgiLRm1mSF09uLaUrhA%3Ds96-c&w=32&q=75 1x, /_next/image?url=https%3A%2F%2Flh3.googleusercontent.com%2Fa%2FAEdFTp4Iivkxu9DEtmTKT84x0yqTgiLRm1mSF09uLaUrhA%3Ds96-c&w=64&q=75 2x" src="./telegram_message_download Script_files/AEdFTp4Iivkxu9DEtmTKT84x0yqTgiLRm1mSF09uLaUrhA=s96-c.jpeg" decoding="async" data-nimg="intrinsic" class="rounded-sm" style="position: absolute; inset: 0px; box-sizing: border-box; padding: 0px; border: none; margin: auto; display: block; width: 0px; height: 0px; min-width: 100%; max-width: 100%; min-height: 100%; max-height: 100%;"></span></div></div><div class="relative flex w-[calc(100%-50px)] md:flex-col lg:w-[calc(100%-115px)]"><div class="flex flex-grow flex-col gap-3"><div class="min-h-[20px] flex flex-col items-start gap-4 whitespace-pre-wrap">```
Y
# In[ ]:
y_pred[:,0] < y_pred[:,1]
# In[ ]:
predicted_labels = np.argmax(y_pred, axis=1)
predicted_labels
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
# # Backtest agaist real data
# In[ ]:
########### IMPORT LIBRARIES ##############
import datetime
# pd.set_option('plotting.backend', 'pandas_bokeh')
# import pandas_bokeh
# pandas_bokeh.output_notebook()
from kiteconnect import exceptions
import login_new
try:
kite = joblib.load('./kitefile.p')
display(kite.profile()['user_name'])
except Exception as tknxcp:
print(tknxcp)
kite = login_new.login_haj_new(446505)
display(kite.profile()['user_name'])
# In[ ]:
#here we are procesing data
droppedna = messagesdf.dropna()
true_predictions_df = droppedna.loc[droppedna.isPredAccurate]
#### Time correct Time column
true_predictions_df.time = true_predictions_df.time + datetime.timedelta(minutes=29, hours=5)
true_predictions_df
# In[ ]: