diff --git a/.gitignore b/.gitignore index be0870f..6b8ae09 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ node_modules __pycache__/* *.pyc -__pycache__/usernames.cpython-37.pyc \ No newline at end of file +__pycache__/usernames.cpython-37.pyc +usercsv/ \ No newline at end of file diff --git a/README.md b/README.md index 6582a79..90b2064 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,19 @@ -# beautifultwint +# Beautiful Twint + +Beautiful Twint scrapes out data of Twitter users specified. The data is pulled using Twint module and stored as csv files. The data in the files will be visualized as per the needs of [Ranchi Mall's](https://twitter.com/ranchimallflo "What is Ranchi Mall?") marketing efforts. + +## Pre-Requisites +The code is written in python3 and makes use of the following python modules: +1. twint +2. pandas +3. Flask + +You can either run `pip3 install twint pandas Flask` or `pip3 install requirements.txt` to install them. + +## Running the app +The app performs 2 functions +1. Fetch Twitter data of userhandles mentionded in the file `usernames.py` & store them as CSV +2. Display -beautifultwint is an app which pulls out twitter information for usersmention in *usernames.py* using Twint module and visualizes them in use cases which are required for Ranchi Mall's marketing efforts. Ps - This is a work in progress app as of April 23, 2020 diff --git a/display.py b/display.py new file mode 100644 index 0000000..088b0ef --- /dev/null +++ b/display.py @@ -0,0 +1,24 @@ +from flask import Flask, render_template +import pandas as pd +import os + +app = Flask(__name__) +current_folder_path = os.path.abspath(os.getcwd()) +csv_foldername = 'usercsv' +csv_folderpath = os.path.join(current_folder_path, 'usercsv') + +@app.route('/') +def homepage(): + # show a list of all files + files = os.listdir(os.path.join(current_folder_path, csv_foldername)) + # read user files and display likes and tweet + newdf = pd.DataFrame() + for idx, filename in enumerate(files): + userdata = pd.read_csv(os.path.join(csv_folderpath,filename), usecols=['username','likes_count','tweet']) + newdf = newdf.append(userdata.loc[0]) + newdf = newdf[['username','likes_count','tweet']] + return render_template('index.html', tables=[newdf.to_html(classes='data', index=False)], titles=newdf.columns.values) + + +if __name__=='__main__': + app.run(debug=True) \ No newline at end of file diff --git a/displayuserinfo.py b/displayuserinfo.py deleted file mode 100644 index 17a3c66..0000000 --- a/displayuserinfo.py +++ /dev/null @@ -1,6 +0,0 @@ -from flask import Flask -app = Flask(__name__) - -@app.route('/') -def homepage(): - return 'Welcome to beautiful twint homepage' \ No newline at end of file diff --git a/fetchcsv.py b/fetchcsv.py index 4efe80d..2ba91f6 100644 --- a/fetchcsv.py +++ b/fetchcsv.py @@ -5,15 +5,34 @@ import twint import os from usernames import * +import shutil +import os + +current_folder_path = os.path.abspath(os.getcwd()) +csv_foldername = 'usercsv' def fetchUserdata(username): c = twint.Config() c.Username = username c.Limit = 10 c.Store_csv = True - current_folder_path = os.path.abspath(os.getcwd()) c.Output = os.path.join(current_folder_path, f"{username}.csv") twint.run.Search(c) -for idx, username in enumerate(twitterUsernames): - fetchUserdata(username) \ No newline at end of file +def moveCSVfiles(): + # Twint saves all csv files in the same folder, so move all of them to usercsv/ + # todo: Find out why Twint doesn save to absolute path and remove the need for this step + files = os.listdir(current_folder_path) + destfolder = os.path.join(current_folder_path,csv_foldername) + for f in files: + if (f.endswith('.csv')): + shutil.move(os.path.join(current_folder_path,f), os.path.join(destfolder,f)) + +if __name__=='__main__': + # Fetch csv data and save + for idx, username in enumerate(twitterUsernames): + fetchUserdata(username) + # move csv files to usercsv folder + moveCSVfiles() + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0955295 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +pandas +Flask +twint \ No newline at end of file diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..1c6bfd7 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,101 @@ + +
+