forked from Gizra/csv2sql
-
Notifications
You must be signed in to change notification settings - Fork 0
/
csv2sql.drush.inc
289 lines (252 loc) · 7.8 KB
/
csv2sql.drush.inc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
<?php
/**
* @file
* Convert CSV to SQL and import to the Drupal instance.
*/
use Drupal\Component\Render\FormattableMarkup;
use Drupal\Core\Database\Database;
/**
* Implements hook_drush_command().
*/
function csv2sql_drush_command() {
$items = array();
$items['csv2sql'] = array(
'description' => 'Convert CSV to SQL and import to the Drupal instance.',
'examples' => array(
'drush csv2sql /path/to/someFile.csv' => 'Converts the someFile.csv to an SQL table.',
),
'arguments' => array(
'path' => 'The path to the CSV file.',
),
'required-arguments' => TRUE,
'options' => array(
'prefix' => 'the prefix of the table. Defaults to "_raw".',
'limit' => 'the number of rows to convert into the SQL table. Defaults to unlimited.',
),
);
return $items;
}
/**
* Implements drush_hook_command().
*
* Command callback for csv2sql.
*
* @param string $csv_path
* Path to the CSV file.
*/
function drush_csv2sql($csv_path) {
$prefix = drush_get_option('prefix', '_raw');
$limit = intval(drush_get_option('limit', FALSE));
if ($limit && !is_int($limit)) {
drush_print('Limit should be an integer.');
return;
}
$files = array();
// If the path given is a directory then scan the directory for CSV files
// otherwise check if it's a file and add it as the only file.
if (is_dir($csv_path)) {
$files = csv2sql_scan_for_csv_files($csv_path);
}
elseif (file_exists($csv_path)) {
$files[] = $csv_path;
}
if (empty($files)) {
drush_print('Please provide either a path to your CSV file or to a directory containing CSV files".');
return;
}
foreach ($files as $csv_file) {
$path_info = pathinfo($csv_file);
$table_name = $prefix . '_' . $path_info['filename'];
drush_print(sprintf('Starting to convert file %s to table %s.', $path_info['filename'], $table_name));
if (($handle = fopen($csv_file, 'r')) !== FALSE) {
$row_number = 0;
$first_row = TRUE;
while ((($data = fgetcsv($handle, 0, ',')) !== FALSE) && (!$limit || $limit > $row_number)) {
if ($first_row) {
$first_row = FALSE;
// Create the table.
$headers = csv2sql_create_db($table_name, $data);
continue;
}
// Insert rows.
$row = array();
foreach ($data as $delta => $value) {
$header_col = $headers[$delta];
$row[$header_col] = str_replace('\"', '"', $value);
}
csv2sql_insert_row_to_table($table_name, $row);
$row_number++;
}
fclose($handle);
}
}
}
/**
* Create a DB.
*
* @param string $table_name
* The table name.
* @param array $header
* Array with the column names and definition that was extracted from the CSV.
* @param bool $drop_existing
* Determines if an existing table should be dropped. Defaults to TRUE.
*
* @return array
* Array with the column names.
*/
function csv2sql_create_db($table_name, $header = array(), $drop_existing = TRUE) {
// Add a serial key as the first column.
$fields_info = array(
'__id' => array(
'type' => 'serial',
'not null' => TRUE,
'description' => 'Primary Key: Numeric ID.',
),
);
$first_col = TRUE;
$index_columns = array();
// The default index for each table.
$primary = array('__id');
// Get the column properties.
foreach ($header as $col) {
$header_info = explode('|', $col);
$col_info = array();
$col_name = csv2sql_get_column_name($header_info[0]);
// Add the first column to index.
if ($first_col && !in_array('index:false', array_map('strtolower', $header_info))) {
$index_columns[$col_name] = array($col_name);
}
// Allow passing complex headers,
// e.g. "Amount|type:int|length:11|not null:false"
// Which will translate to a column "amount" of type int(11) and NULL value
// is allowed.
// Index the first column in the table as a default,
// unless stated otherwise in the column header (index:FALSE),
// Add other columns to index if stated in the header (index:TRUE).
if (!empty($header_info[1])) {
$properties = $header_info;
// Remove the column name.
unset($properties[0]);
foreach ($properties as $property) {
list($key, $value) = explode(':', $property);
// Check which columns needs to be added to index.
// Index can't go into the field properties
// (It's added to table's properties).
if (strtolower($key) == 'index') {
// Add to index only if index is set to TRUE and it's not the
// first column.
if (strtoupper($value) == 'TRUE' && !$first_col) {
// Add the column to the table index.
$index_columns[$col_name] = array($col_name);
}
}
else {
$col_info[$key] = $value;
}
}
}
if (empty($col_info)) {
// Add default values.
$col_info = array(
'description' => '',
'type' => 'varchar',
'length' => 255,
'not null' => TRUE,
'default' => '',
);
}
$fields_info[$col_name] = $col_info;
$first_col = FALSE;
}
$schema = Database::getConnection()->schema();
if ($drop_existing) {
// Drop existing table.
$schema->dropTable($table_name);
}
$table_schema = array(
'fields' => $fields_info,
'primary key' => $primary,
'indexes' => $index_columns,
);
$schema->createTable($table_name, $table_schema);
$headers = array_keys($fields_info);
// Remove the ID key.
unset($headers[0]);
return array_values($headers);
}
/**
* Insert a single row to the table.
*
* @param string $table_name
* The table name.
* @param array $row
* The row to insert.
*
* @return bool
* TRUE if the insert operation was successful.
*/
function csv2sql_insert_row_to_table($table_name, $row) {
try {
return Database::getConnection()
->insert($table_name)
->fields($row)
->execute();
}
catch (Exception $exception) {
drush_log(sprintf('Error in row #%d: %s', $row['__id'], $exception->getMessage()), 'error');
return FALSE;
}
}
/**
* Get a machine readable name from the column name.
*
* In order to prevent having MYSQL reserved words, we prefix all the columns
* with an underscore.
*
* @param string $col_name
* The column name.
*
* @return string
* The trimmed, lower-cased and underscored name of the column.
*/
function csv2sql_get_column_name($col_name) {
$pattern = '/[^a-zA-Z0-9\/]+/';
$col_name = preg_replace($pattern, '_', $col_name);
return '_' . trim(strtolower(str_replace(array('-', ' ', '/'), '_', $col_name)));
}
/**
* Scan a directory for csv files.
*
* Scans the directory given by the user in the command,
* Scans all the sub-directories of that directory, searches for files with
* 'csv' extension to be added to the convert to SQL process.
*
* @param string $dir
* The path of the directory provided in the command.
*
* @return array
* An array of 'csv' files found in the directory.
*/
function csv2sql_scan_for_csv_files($dir) {
$csv_files = array();
// If the directory does not exist, print a message, stop the function.
if (!file_exists($dir)) {
drush_print('Given directory does not exist.');
return FALSE;
}
// Add a trailing slash and a * so the glob returns everything in the
// directory, trim trailing slash and add them here, To make sure we
// never have '//'.
$items_in_dir = glob(rtrim($dir, '/') . DIRECTORY_SEPARATOR . '*');
foreach ($items_in_dir as $item) {
// If current item is a 'csv' file, add it to the list of files.
if (pathinfo($item, PATHINFO_EXTENSION) == 'csv') {
$csv_files[] = $item;
}
// If current item is a directory, scan again.
if (is_dir($item)) {
$csv_files = array_merge($csv_files, csv2sql_scan_for_csv_files($item));
}
}
return $csv_files;
}