Skip to content

Commit

Permalink
Init
Browse files Browse the repository at this point in the history
  • Loading branch information
Artur authored and kompus committed Jun 12, 2018
1 parent 8d6a2ac commit c6b67fb
Show file tree
Hide file tree
Showing 42 changed files with 2,526 additions and 1 deletion.
68 changes: 67 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,67 @@
# twitter-saver
# twitter-saver

## Description

### Info
The program was created for *Team project - development of data analysis system* course run by [@pbiecek](https://github.com/pbiecek) at Warsaw University of Technology.

### Program description

The aim of the created program is to download tweets from twitter by defined user or keyword. Downloading can be done in two modes:

* stream
* history

### Further information

Full specification and more detailed description of summarization features (in Polish) can be found in [this file](https://github.com/minorczyka/twitter-saver/blob/master/docs/Instrukcja%20obs%C5%82ugi.pdf).

## Installation

1. Download binary file from latest release
2. Prepare `config.yaml`
3. Run binaries `twitter-saver` and `web`

## Usage

### Running

The programs can be run from command line with following arguments:

* `--config` - path to config file

Config file is stored in YAML format. It contains following information:

- `db`:
- `host`
- `port`
- `user`
- `password`
- `dbName` - database name in which data will be stored
- `sslMode` - `enable` or `disable`
- `web` - web interface parameters:
- `port` - port on which server will be working
- `secret` - private key used to sign session identifiers. Should be random and renewed periodically. Keys shorter than 256 bits are not recommended.
- `users` - sequence of user accounts. Each account consists of:
- `username`
- `password`
- `twitter` - twitter API keys:
- `consumerKey`
- `consumerSecret`
- `token`
- `tokenSecret`
- `json` - defines additional fields from tweet saved in database
- `all` - saves whole tweet content
- `fields` - sequence of field names to be stored
- `autoDeleteDays` - number of days after which data will be automatically removed

### Screenshots and live examples

Screenshot of project UI:
![the screenshot](https://github.com/minorczyka/twitter-saver/blob/master/misc/screenshots/screen1.png)

## Authors

* Piotr Krzeszewski
* Łukasz Ławniczak
* Artur Minorczyk
97 changes: 97 additions & 0 deletions core/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package core

import (
"errors"
"fmt"

"github.com/dghubble/go-twitter/twitter"
"github.com/dghubble/oauth1"
)

type DbConfig struct {
Host *string
Port *string
User *string
Password *string
DbName *string `yaml:"dbName"`
SslMode *string `yaml:"sslMode"`
}

type TwitterConfig struct {
ConsumerKey *string `yaml:"consumerKey"`
ConsumerSecret *string `yaml:"consumerSecret"`
Token *string
TokenSecret *string `yaml:"tokenSecret"`
}

type JsonConfig struct {
All bool
Fields []string
}

type UserAccount struct {
Username *string
Password *string
}

type WebConfig struct {
Port *int
Secret *string
Users []UserAccount
}

func (d *DbConfig) ConnectionString() (string, error) {
if d.Host == nil {
return "", errors.New("Database host not specified.")
}
if d.Port == nil {
return "", errors.New("Database port not specified.")
}
if d.User == nil {
return "", errors.New("Database user not specified.")
}
if d.Password == nil {
return "", errors.New("Database password not specified.")
}
if d.DbName == nil {
return "", errors.New("Database name not specified.")
}
if d.SslMode == nil {
return "", errors.New("Database ssl mode not specified.")
}
return fmt.Sprintf("host=%s port=%s user=%s dbname=%s password=%s sslmode=%s",
*d.Host, *d.Port, *d.User, *d.DbName, *d.Password, *d.SslMode), nil
}

func (t *TwitterConfig) TwitterClient() (*twitter.Client, error) {
if t.ConsumerKey == nil {
return nil, errors.New("Twitter consumer key not specified.")
}
if t.ConsumerSecret == nil {
return nil, errors.New("Twitter consumer secret not specified.")
}
if t.Token == nil {
return nil, errors.New("Twitter token not specified.")
}
if t.TokenSecret == nil {
return nil, errors.New("Twitter token secret not specified.")
}
config := oauth1.NewConfig(*t.ConsumerKey, *t.ConsumerSecret)
token := oauth1.NewToken(*t.Token, *t.TokenSecret)
httpClient := config.Client(oauth1.NoContext, token)
return twitter.NewClient(httpClient), nil
}

func (w *WebConfig) WebAccounts() (map[string]string, error) {
result := make(map[string]string)
for _, user := range w.Users {
if user.Username == nil {
return nil, errors.New("Username not specified for account.")
}
if user.Password == nil {
return nil, errors.New("Password not specified for account.")
}
result[*user.Username] = *user.Password
}
return result, nil
}
22 changes: 22 additions & 0 deletions core/connect.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package core

import (
"log"

"github.com/jinzhu/gorm"
_ "github.com/jinzhu/gorm/dialects/postgres"
)

func Connect(connectionString string) *gorm.DB {
db, err := gorm.Open("postgres", connectionString)
if err != nil {
log.Fatal(err)
}

db.AutoMigrate(&Tweet{})
db.AutoMigrate(&Object{})

db.Model(&Tweet{}).AddForeignKey("object_id", "objects(id)", "CASCADE", "NO ACTION")

return db
}
44 changes: 44 additions & 0 deletions core/models.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package core

import (
"time"
)

type Tweet struct {
ID uint `gorm:"primary_key"`
CreatedAt time.Time

TweetId int64 `gorm:"not null"`
PublishedAt time.Time `gorm:"not null"`
UserId int64 `gorm:"not null"`
Text string `gorm:"not null" sql:"index"`
ExtendedInfo string
ObjectId uint
}

type ObjectSource int32

const (
HistorySource ObjectSource = iota + 1
StreamSource
)

type ObjectType = int32

const (
UserType ObjectType = iota + 1
KeywordType
)

type Object struct {
ID uint `gorm:"primary_key"`
CreatedAt time.Time
DeletedAt *time.Time `sql:"index"`

Source ObjectSource `gorm:"not null"`
Type ObjectType `gorm:"not null"`
Query string `gorm:"not null"`
HistoryFrom *time.Time
HistoryDone bool
Tweets []Tweet
}
33 changes: 33 additions & 0 deletions core/object_dao.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package core

import (
"github.com/jinzhu/gorm"
)

func FindStreamObjects(db *gorm.DB) []Object {
var objects []Object
db.Where(&Object{Source: StreamSource}).Find(&objects)
return objects
}

func CountStreamObjects(db *gorm.DB) int {
var count int
db.Model(&Object{}).Where("source = ?", StreamSource).Count(&count)
return count
}

func FindHistoryObjects(db *gorm.DB) []Object {
var objects []Object
db.Where(&Object{Source: HistorySource}).Find(&objects)
return objects
}

func FindAllObjects(db *gorm.DB) []Object {
var objects []Object
db.Unscoped().Find(&objects)
return objects
}

func UpdateObjectHistory(db *gorm.DB, object *Object, history bool) {
db.Model(object).Update("history_done", history)
}
Loading

0 comments on commit c6b67fb

Please sign in to comment.