Skip to content

Commit

Permalink
change schema
Browse files Browse the repository at this point in the history
  • Loading branch information
fozbek committed May 25, 2021
1 parent 75d3cf7 commit d7477d8
Show file tree
Hide file tree
Showing 12 changed files with 3,297 additions and 254 deletions.
33 changes: 10 additions & 23 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ $url = 'https://google.com';
$schema = [
'title' => 'title',
'a-tags' => [
'selector' => 'a',
'list-selector' => 'a',
'content' => [
'text' => 'a',
'url' => 'a@href',
Expand Down Expand Up @@ -47,9 +47,9 @@ Response (Formatted)
}

#### Examples
>You can test all of these in any xenforo forum. Example url: https://xenforo.com/community/forums/announcements/
>You can test all of these in any site that uses xenforo. Example url: https://xenforo.com/community/forums/announcements/
- Scrape single selector
- Single selector
```php
$schema = [
'forum-title' => '.p-body-header .p-title-value'
Expand All @@ -60,7 +60,7 @@ $schema = [
```php
$schema = [
'threads' => [
'selector' => '.structItem--thread',
'list-selector' => '.structItem--thread',
'content' => [
'thread-title' => '.structItem-title',
'thread-url' => '.structItem-title a@href',
Expand All @@ -70,22 +70,12 @@ $schema = [
];
```

- Pagination
```php
$schema = [
'title' => 'title',
'pagination' => [
'limit' => 3,
'selector' => '.pageNav-jump--next@href',
],
];
```

- New Request
```php
$schema = [
'login-page' => [
'selector' => 'a.p-navgroup-link--logIn@href',
'request-selector' => 'a.p-navgroup-link--logIn@href',
'base-url' => 'https://xenforo.com',
'content' => [
'title' => 'title',
],
Expand All @@ -98,19 +88,16 @@ $schema = [
$schema = [
'title' => 'title',
'threads' => [
'selector' => '.structItem--thread',
'list-selector' => '.structItem--thread',
'content' => [
'thread-detail' => [
'selector' => '.structItem-title a@href',
'request-selector' => '.structItem-title a@href',
'base-url' => 'https://xenforo.com',
'content' => [
'thread-content' => '.message-body .bbWrapper',
],
],
],
],
'pagination' => [
'limit' => 3,
'selector' => '.pageNav-jump--next@href',
],
]
];
```
8 changes: 5 additions & 3 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,19 @@
"email": "mail@fatih.dev"
}
],
"description": "Simple, schema based scraping tool",
"autoload": {
"psr-4": {
"Scrawler\\": "src/"
}
},
"require": {
"symfony/dom-crawler": "^5.0",
"symfony/css-selector": "^5.0",
"guzzlehttp/guzzle": "~6.0|^7.0.1"
"guzzlehttp/guzzle": "~6.0|^7.0.1",
"imangazaliev/didom": "^1.16",
"ext-dom": "*"
},
"require-dev": {
"roave/security-advisories": "dev-latest",
"phpstan/phpstan": "^0.12.34",
"phpunit/phpunit": "^9"
},
Expand Down
Loading

0 comments on commit d7477d8

Please sign in to comment.