From b66eb3d909d4622e1073e1325afc7766d7f9ce7a Mon Sep 17 00:00:00 2001 From: Vivek Teega Date: Thu, 23 Apr 2020 21:42:00 +0530 Subject: [PATCH 1/3] Move csv files to different folder because Twint isn't saving to absolute path --- displayuserinfo.py | 2 ++ fetchcsv.py | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/displayuserinfo.py b/displayuserinfo.py index 17a3c66..f38d6da 100644 --- a/displayuserinfo.py +++ b/displayuserinfo.py @@ -1,4 +1,6 @@ from flask import Flask +import pandas + app = Flask(__name__) @app.route('/') diff --git a/fetchcsv.py b/fetchcsv.py index 4efe80d..0e851ae 100644 --- a/fetchcsv.py +++ b/fetchcsv.py @@ -5,15 +5,27 @@ import twint import os from usernames import * +import shutil +import os + +current_folder_path = os.path.abspath(os.getcwd()) def fetchUserdata(username): c = twint.Config() c.Username = username c.Limit = 10 c.Store_csv = True - current_folder_path = os.path.abspath(os.getcwd()) c.Output = os.path.join(current_folder_path, f"{username}.csv") twint.run.Search(c) for idx, username in enumerate(twitterUsernames): - fetchUserdata(username) \ No newline at end of file + fetchUserdata(username) + +# Twint saves all csv files in the same folder, so move all of them to usercsv/ +# todo: Find out why Twint doesn save to absolute path and remove the need for this step +files = os.listdir(current_folder_path) +destfolder = os.path.join(current_folder_path,'usercsv') +for f in files: + if (f.endswith('.csv')): + shutil.move(os.path.join(current_folder_path,f), os.path.join(destfolder,f)) + From f81681bde9a0bf7cd6bc9fd78db600b7b84d6b22 Mon Sep 17 00:00:00 2001 From: Vivek Teega Date: Fri, 24 Apr 2020 02:35:07 +0530 Subject: [PATCH 2/3] First POW --- .gitignore | 3 +- README.md | 18 +++++++- display.py | 24 ++++++++++ displayuserinfo.py | 8 ---- fetchcsv.py | 25 +++++++---- requirements.txt | 3 ++ templates/index.html | 101 +++++++++++++++++++++++++++++++++++++++++++ usernames.py | 88 +++++++++++++++++++++++++------------ 8 files changed, 223 insertions(+), 47 deletions(-) create mode 100644 display.py delete mode 100644 displayuserinfo.py create mode 100644 requirements.txt create mode 100644 templates/index.html diff --git a/.gitignore b/.gitignore index be0870f..6b8ae09 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ node_modules __pycache__/* *.pyc -__pycache__/usernames.cpython-37.pyc \ No newline at end of file +__pycache__/usernames.cpython-37.pyc +usercsv/ \ No newline at end of file diff --git a/README.md b/README.md index 6582a79..90b2064 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,19 @@ -# beautifultwint +# Beautiful Twint + +Beautiful Twint scrapes out data of Twitter users specified. The data is pulled using Twint module and stored as csv files. The data in the files will be visualized as per the needs of [Ranchi Mall's](https://twitter.com/ranchimallflo "What is Ranchi Mall?") marketing efforts. + +## Pre-Requisites +The code is written in python3 and makes use of the following python modules: +1. twint +2. pandas +3. Flask + +You can either run `pip3 install twint pandas Flask` or `pip3 install requirements.txt` to install them. + +## Running the app +The app performs 2 functions +1. Fetch Twitter data of userhandles mentionded in the file `usernames.py` & store them as CSV +2. Display -beautifultwint is an app which pulls out twitter information for usersmention in *usernames.py* using Twint module and visualizes them in use cases which are required for Ranchi Mall's marketing efforts. Ps - This is a work in progress app as of April 23, 2020 diff --git a/display.py b/display.py new file mode 100644 index 0000000..088b0ef --- /dev/null +++ b/display.py @@ -0,0 +1,24 @@ +from flask import Flask, render_template +import pandas as pd +import os + +app = Flask(__name__) +current_folder_path = os.path.abspath(os.getcwd()) +csv_foldername = 'usercsv' +csv_folderpath = os.path.join(current_folder_path, 'usercsv') + +@app.route('/') +def homepage(): + # show a list of all files + files = os.listdir(os.path.join(current_folder_path, csv_foldername)) + # read user files and display likes and tweet + newdf = pd.DataFrame() + for idx, filename in enumerate(files): + userdata = pd.read_csv(os.path.join(csv_folderpath,filename), usecols=['username','likes_count','tweet']) + newdf = newdf.append(userdata.loc[0]) + newdf = newdf[['username','likes_count','tweet']] + return render_template('index.html', tables=[newdf.to_html(classes='data', index=False)], titles=newdf.columns.values) + + +if __name__=='__main__': + app.run(debug=True) \ No newline at end of file diff --git a/displayuserinfo.py b/displayuserinfo.py deleted file mode 100644 index f38d6da..0000000 --- a/displayuserinfo.py +++ /dev/null @@ -1,8 +0,0 @@ -from flask import Flask -import pandas - -app = Flask(__name__) - -@app.route('/') -def homepage(): - return 'Welcome to beautiful twint homepage' \ No newline at end of file diff --git a/fetchcsv.py b/fetchcsv.py index 0e851ae..2ba91f6 100644 --- a/fetchcsv.py +++ b/fetchcsv.py @@ -9,6 +9,7 @@ import shutil import os current_folder_path = os.path.abspath(os.getcwd()) +csv_foldername = 'usercsv' def fetchUserdata(username): c = twint.Config() @@ -18,14 +19,20 @@ def fetchUserdata(username): c.Output = os.path.join(current_folder_path, f"{username}.csv") twint.run.Search(c) -for idx, username in enumerate(twitterUsernames): - fetchUserdata(username) +def moveCSVfiles(): + # Twint saves all csv files in the same folder, so move all of them to usercsv/ + # todo: Find out why Twint doesn save to absolute path and remove the need for this step + files = os.listdir(current_folder_path) + destfolder = os.path.join(current_folder_path,csv_foldername) + for f in files: + if (f.endswith('.csv')): + shutil.move(os.path.join(current_folder_path,f), os.path.join(destfolder,f)) + +if __name__=='__main__': + # Fetch csv data and save + for idx, username in enumerate(twitterUsernames): + fetchUserdata(username) + # move csv files to usercsv folder + moveCSVfiles() -# Twint saves all csv files in the same folder, so move all of them to usercsv/ -# todo: Find out why Twint doesn save to absolute path and remove the need for this step -files = os.listdir(current_folder_path) -destfolder = os.path.join(current_folder_path,'usercsv') -for f in files: - if (f.endswith('.csv')): - shutil.move(os.path.join(current_folder_path,f), os.path.join(destfolder,f)) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0955295 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +pandas +Flask +twint \ No newline at end of file diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..1c6bfd7 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,101 @@ + + + Twitter user info + + + + + +

Twitter Users

+ {% for table in tables %} {{titles[loop.index]}} {{ table|safe }} {% endfor + %} + + diff --git a/usernames.py b/usernames.py index 8a53423..ede3ea0 100644 --- a/usernames.py +++ b/usernames.py @@ -1,27 +1,61 @@ -twitterUsernames = ["saketrx", - "vivekteega", - "ranchimallflo", - "zee24taasnews", - "alexmohajer", - "pbns_india", - "wtop", - "live_hindustan", - "kashjackson2018", - "livemint", - "trivworks", - "jagranenglish", - "tommyigoe", - "maevemarsden", - "anncoulter", - "iamjohnales", - "justin_marks_", - "carmenbeat", - "asranomani", - "ralf_stegner", - "iamjohnales", - "iamjohnales", - "ericg1247", - "canoe", - "lastampa", - "kris6news", - "the_hindu"] +twitterUsernames = ["DesiCryptoHodlr", + "VishalHKothari", + "TabassumNaiz", + "Shaanush", + "bitkashyap", + "howdy_akshay", + "D_P_tripathi", + "simplykashif", + "mishralokk", + "blockchainlaw91", + "DaniAdvocate", + "ThatNaimish", + "smtgpt", + "anshuldhir_", + "opinderpreet", + "manasilvora", + "dinscrypto", + "shineparamel", + "amu4biz", + "adinalini", + "BitcoinTarun", + "iMoneshKumar", + "ajayjadhav", + "Suresh81189", + "raopreetam007", + "chandresh1091", + "JChittoda", + "VenkatOnbuzz", + "Vivek4real_", + "aiyadt", + "CryptoAnkush", + "ARReddy4694", + "Ruch_9", + "blahblah1058", + "Aanchal_Thakur1", + "jafrinnnn", + "imvijaygir", + "cryptogiriraj", + "RRZPX", + "drkailaschandr1", + "sachin_id", + "nandubatchu", + "therealkeerthan", + "KunduSourodip", + "syedvajahaath", + "atvanguard, + "manishgrover", + "VaibhavMuchand3", + "hmalviya9", + "CryptoShrikar", + "kg_Cashaa", + "sidsverma", + "darshanbathija", + "AnkittGaur", + "TarushaM", + "Dul_dul", + "AraBalaghi", + "TheMohitMadan", + "vaibhavchellani", + "chetanb_", + "HeyVixon"] From 20121a8a42f82874428368e89f4f5ed8809cf3d1 Mon Sep 17 00:00:00 2001 From: Vivek Teega Date: Fri, 24 Apr 2020 02:43:36 +0530 Subject: [PATCH 3/3] Fix minor typo --- usernames.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usernames.py b/usernames.py index ede3ea0..e3773c2 100644 --- a/usernames.py +++ b/usernames.py @@ -43,7 +43,7 @@ twitterUsernames = ["DesiCryptoHodlr", "therealkeerthan", "KunduSourodip", "syedvajahaath", - "atvanguard, + "atvanguard", "manishgrover", "VaibhavMuchand3", "hmalviya9",