Skip to content
This repository has been archived by the owner on Mar 30, 2023. It is now read-only.

Commit

Permalink
fix conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
tkmru committed Nov 12, 2018
2 parents 9c74e41 + bab8da3 commit 8226057
Show file tree
Hide file tree
Showing 11 changed files with 98 additions and 376 deletions.
37 changes: 30 additions & 7 deletions README.md
Expand Up @@ -25,36 +25,37 @@ Some of the benefits of using Twint vs Twitter API:
## Installing

### Stable version
**Pip:**
**Pip:**
```bash
pip3 install twint
```
```

**Pipenv**:
**Pipenv**:
```bash
pipenv install twint
```

### Development version
**Git:**
**Git:**
```bash
git clone https://github.com/twintproject/twint.git
pip3 install -r requirements.txt
```

**Pip:**
**Pip:**
```bash
pip3 install --upgrade -e git+https://github.com/twintproject/twint.git@origin/master#egg=twint
```
```

**Pipenv**:
**Pipenv**:
```bash
pipenv install -e git+https://github.com/twintproject/twint.git#egg=twint
```

## CLI Basic Examples and Combos
A few simple examples to help you understand the basics:

<<<<<<< HEAD
- `twint -u username` - Scrape all the Tweets from *user*'s timeline.
- `twint -u username -s pineapple` - Scrape all Tweets from the *user*'s timeline containing _pineapple_.
- `twint -s pineapple` - Collect every Tweet containing *pineapple* from everyone's Tweets.
Expand All @@ -75,6 +76,28 @@ A few simple examples to help you understand the basics:
- `twint -u username --profile-full` - Use a slow, but effective method to gather Tweets from a user's profile (Gathers ~3200 Tweets, Including Retweets).
- `twint -u username --retweets` - Use a quick method to gather the last 900 Tweets (that includes retweets) from a user's profile.
- `twint -u username --resume 10940389583058` - Resume a search starting from the specified Tweet ID.
=======
- `python3 Twint.py -u username` - Scrape all the Tweets from *user*'s timeline.
- `python3 Twint.py -u username -s pineapple` - Scrape all Tweets from the *user*'s timeline containing _pineapple_.
- `python3 Twint.py -s pineapple` - Collect every Tweet containing *pineapple* from everyone's Tweets.
- `python3 Twint.py -u username --year 2014` - Collect Tweets that were tweeted **before** 2014.
- `python3 Twint.py -u username --since 2015-12-20` - Collect Tweets that were tweeted since 2015-12-20.
- `python3 Twint.py -u username -o file.txt` - Scrape Tweets and save to file.txt.
- `python3 Twint.py -u username -o file.csv --csv` - Scrape Tweets and save as a csv file.
- `python3 Twint.py -u username --email --phone` - Show Tweets that might have phone numbers or email addresses.
- `python3 Twint.py -s "Donald Trump" --verified` - Display Tweets by verified users that Tweeted about Donald Trump.
- `python3 Twint.py -g="48.880048,2.385939,1km" -o file.csv --csv` - Scrape Tweets from a radius of 1km around a place in Paris and export them to a csv file.
- `python3 Twint.py -u username -es localhost:9200` - Output Tweets to Elasticsearch
- `python3 Twint.py -u username -o file.json --json` - Scrape Tweets and save as a json file.
- `python3 Twint.py -u username --database tweets.db` - Save Tweets to a SQLite database.
- `python3 Twint.py -u username --followers` - Scrape a Twitter user's followers.
- `python3 Twint.py -u username --following` - Scrape who a Twitter user follows.
- `python3 Twint.py -u username --favorites` - Collect all the Tweets a user has favorited.
- `python3 Twint.py -u username --following --user-full` - Collect full user information a person follows
- `python3 Twint.py -u username --profile-full` - Use a slow, but effective method to gather Tweets from a user's profile (Gathers ~3200 Tweets, Including Retweets).
- `python3 Twint.py -u username --retweets` - Use a quick method to gather the last 900 Tweets (that includes retweets) from a user's profile.
- `python3 Twint.py -u username --resume 10940389583058` - Resume a search starting from the specified Tweet ID.
>>>>>>> bab8da34f77dcbf15ec46068e3e2a9301a2a4c21
More detail about the commands and options are located in the [wiki](https://github.com/twintproject/twint/wiki/Commands)

Expand Down
22 changes: 8 additions & 14 deletions Twint.py
Expand Up @@ -29,14 +29,14 @@ def check(args):
if args.userid:
error("Contradicting Args",
"--userid and -u cannot be used together.")
else:
if args.search is None:
error("Error", "Please use at least -u or -s.")
if args.output is None:
if args.csv:
error("Error", "Please specify an output file (Example: -o file.csv).")
elif args.json:
error("Error", "Please specify an output file (Example: -o file.json).")
if args.hostname:
if args.Database is None or args.DB_user is None or args.DB_pwd is None:
error("Error", "Please specify database name, user and password")

def loadUserList(ul, _type):
""" Concatenate users
Expand Down Expand Up @@ -69,24 +69,21 @@ def initialize(args):
c.Year = args.year
c.Since = args.since
c.Until = args.until
c.Fruit = args.fruit
c.Email = args.email
c.Phone = args.phone
c.Verified = args.verified
c.Store_csv = args.csv
c.Store_json = args.json
c.Show_hashtags = args.hashtags
c.Limit = args.limit
c.Count = args.count
c.Stats = args.stats
c.hostname = args.hostname
c.Database = args.database
c.DB_user = args.DB_user
c.DB_pwd = args.DB_pwd
c.To = args.to
c.All = args.all
c.Essid = args.essid
c.Format = args.format
c.User_full = args.user_full
c.User_info = args.user_info
c.Profile_full = args.profile_full
c.Store_pandas = args.store_pandas
c.Pandas_type = args.pandas_type
Expand Down Expand Up @@ -126,7 +123,8 @@ def options():
ap.add_argument("--year", help="Filter Tweets before specified year.")
ap.add_argument("--since", help="Filter Tweets sent since date (Example: 2017-12-27).")
ap.add_argument("--until", help="Filter Tweets sent until date (Example: 2017-12-27).")
ap.add_argument("--fruit", help="Display 'low-hanging-fruit' Tweets.", action="store_true")
ap.add_argument("--email", help="Filter Tweets that might have email addresses", action="store_true")
ap.add_argument("--phone", help="Filter Tweets that might have phone numbers", action="store_true")
ap.add_argument("--verified", help="Display Tweets only from verified users (Use with -s).",
action="store_true")
ap.add_argument("--csv", help="Write as .csv file.", action="store_true")
Expand All @@ -138,10 +136,7 @@ def options():
action="store_true")
ap.add_argument("--stats", help="Show number of replies, retweets, and likes.",
action="store_true")
ap.add_argument("--hostname", help="Store the mysql database host")
ap.add_argument("-db", "--database", help="Store Tweets in a sqlite3 or mysql database.")
ap.add_argument("--DB_user", help="Store the mysql database user")
ap.add_argument("--DB_pwd", help="Store the mysql database pwd")
ap.add_argument("-db", "--database", help="Store Tweets in a sqlite3 database.")
ap.add_argument("--to", help="Search Tweets to a user.")
ap.add_argument("--all", help="Search all Tweets associated with a user.")
ap.add_argument("--followers", help="Scrape a person's followers.", action="store_true")
Expand All @@ -161,7 +156,6 @@ def options():
ap.add_argument("--user-full",
help="Collect all user information (Use with followers or following only).",
action="store_true")
ap.add_argument("--user-info", help="Scrape user's info in tweet", action="store_true")
ap.add_argument("--profile-full",
help="Slow, but effective method of collecting a user's Tweets and RT.",
action="store_true")
Expand Down
9 changes: 0 additions & 9 deletions elasticsearch/index-tweets.json
Expand Up @@ -20,20 +20,11 @@ PUT twinttweets
"day": {"type": "integer"},
"hour": {"type": "integer"},
"link": {"type": "text"},
"gif_url": {"type": "text"},
"gif_thumb": {"type": "text"},
"video_url": {"type": "text"},
"video_thumb": {"type": "text"},
"is_reply_to": {"type": "long"},
"has_parent_tweet": {"type": "long"},
"retweet": {"type": "text"},
"essid": {"type": "keyword"},
"nlikes": {"type": "integer"},
"nreplies": {"type": "integer"},
"nretweets": {"type": "integer"},
"is_quote_status": {"type": "long"},
"quote_id": {"type": "long"},
"quote_id_str": {"type": "text"},
"quote_url": {"type": "text"},
"search": {"type": "text"},
"near": {"type": "text"},
Expand Down
6 changes: 3 additions & 3 deletions twint/config.py
Expand Up @@ -2,7 +2,7 @@ class Config:
Username = None
User_id = None
Search = None
Geo = None
Geo = ""
Location = False
Near = None
Lang = None
Expand All @@ -12,7 +12,8 @@ class Config:
Year = None
Since = None
Until = None
Fruit = False
Email = False
Phone = False
Verified = False
Store_csv = False
Store_json = False
Expand All @@ -33,7 +34,6 @@ class Config:
Favorites = False
TwitterSearch = False
User_full = False
User_info = False
Profile_full = False
Store_object = False
Store_pandas = False
Expand Down
47 changes: 1 addition & 46 deletions twint/output.py
Expand Up @@ -46,7 +46,7 @@ def _output(obj, output, config, **extra):
else:
obj.username = obj.username.lower()
for i in range(len(obj.mentions)):
obj.mentions[i] = obj.mentions[i]["screen_name"].lower()
obj.mentions[i] = obj.mentions[i].lower()
for i in range(len(obj.hashtags)):
obj.hashtags[i] = obj.hashtags[i].lower()
if config.Output != None:
Expand Down Expand Up @@ -87,51 +87,6 @@ async def checkData(tweet, location, config, conn):
if copyright is None and is_tweet(tweet):
tweet = Tweet(tweet, location, config)

if config.Database is not None and config.User_info:
for user in tweet.mentions:
if db.get_user_id(conn, user["id"]) == -1 and user["id"] not in user_ids:
user_ids.add(user["id"])
usernames.append(user["screen_name"])
for user in tweet.tags:
if db.get_user_id(conn, user["id"]) == -1 and user["id"] not in user_ids:
user_ids.add(user["id"])
usernames.append(user["screen_name"])
for user in tweet.replies:
if db.get_user_id(conn, user["id"]) == -1 and user["id"] not in user_ids:
user_ids.add(user["id"])
usernames.append(user["screen_name"])

if config.Database is not None and config.User_info:
for user in usernames:
url = f"http://twitter.com/{user}?lang=en"
await get.User(url, config, conn)

if config.User_info:
for user in tweet.mentions:
try:
_duplicate_dict[user["screen_name"]]
except KeyError:
_duplicate_dict[user["screen_name"]] = True
_user = user["screen_name"]
url = f"http://twitter.com/{_user}?lang=en"
await get.User(url, config, conn)
for user in tweet.tags:
try:
_duplicate_dict[user["screen_name"]]
except KeyError:
_duplicate_dict[user["screen_name"]] = True
_user = user["screen_name"]
url = f"http://twitter.com/{_user}?lang=en"
await get.User(url, config, conn)
for user in tweet.replies:
try:
_duplicate_dict[user["screen_name"]]
except KeyError:
_duplicate_dict[user["screen_name"]] = True
_user = user["screen_name"]
url = f"http://twitter.com/{_user}?lang=en"
await get.User(url, config, conn)

if datecheck(tweet.datestamp, config):
output = format.Tweet(config, tweet)

Expand Down

0 comments on commit 8226057

Please sign in to comment.