-
Notifications
You must be signed in to change notification settings - Fork 1
/
CPE_parser.py
63 lines (45 loc) · 1.23 KB
/
CPE_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 10 21:05:00 2022
@author: IIT
"""
import pandas as pd
#%% getting vendor
data = pd.read_csv('../data/output/all_data_2019.csv')
data = data[['ID','cpe23Uri','url']]
data = data.dropna()
lst = data['cpe23Uri'].values.tolist()
vendor_lst = []
soft_lst = []
item = lst[0]
for item in lst:
# if type(item == NoneType)
tem_vendor = []
temp_soft = []
cpe_lst = item.split('#arman#')
for a_cpe in cpe_lst:
a_cpe_sp = a_cpe.split(':')
vendor= a_cpe_sp[3]
soft = a_cpe_sp[4]
if vendor not in tem_vendor:
# print(vendor,tem_vendor)
tem_vendor.append(vendor)
if soft not in temp_soft:
temp_soft.append(soft)
vendor_lst.append(','.join(tem_vendor))
soft_lst.append(','.join(temp_soft))
df = pd.DataFrame()
df['ID'] = data['ID']
df['vendor'] = vendor_lst
df['software'] = soft_lst
#%% parsing git
url_lst = data['url'].values.tolist()
git_list = []
for url in url_lst:
is_url = url.find('github')
if is_url ==-1:
git_list.append(None)
else:
git_list.append('yes')
df['github'] = git_list
df.to_csv('vedor_info.csv',index = False)