Skip to content

Commit d94305b

Browse files
committed
Add basic app files and Readme on how to run
0 parents  commit d94305b

File tree

5 files changed

+195
-0
lines changed

5 files changed

+195
-0
lines changed

.gitignore

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
.idea/
10+
.idea*
11+
12+
# Distribution / packaging
13+
.Python
14+
env/
15+
build/
16+
develop-eggs/
17+
dist/
18+
downloads/
19+
eggs/
20+
.eggs/
21+
lib/
22+
lib64/
23+
parts/
24+
sdist/
25+
var/
26+
*.egg-info/
27+
.installed.cfg
28+
*.egg
29+
30+
# Installer logs
31+
pip-log.txt
32+
pip-delete-this-directory.txt
33+
34+
# Unit test / coverage reports
35+
htmlcov/
36+
.tox/
37+
.coverage
38+
.coverage.*
39+
.cache
40+
nosetests.xml
41+
coverage.xml
42+
*,cover
43+
.hypothesis/
44+
45+
# Translations
46+
*.mo
47+
*.pot
48+
49+
# Django stuff:
50+
*.log
51+
52+
# Sphinx documentation
53+
docs/_build/
54+
55+
# PyBuilder
56+
target/
57+
58+
#Ipython Notebook
59+
.ipynb_checkpoints
60+
61+
#config file for pypi
62+
.pypirc
63+
64+
#ignoring scraped data
65+
data/
66+
67+
#project settings
68+
../.idea/

README.md

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
## Introduction
2+
3+
Google Play Store App Reviews Downloader
4+
5+
#### To run
6+
``python reviews_fetch.py 'app_id''``

reviews_fetch.py

+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import requests
2+
import json
3+
from bs4 import BeautifulSoup
4+
import random
5+
import time
6+
import traceback
7+
import os
8+
import sys
9+
import csv
10+
11+
"""
12+
@author: Ademola Oyewale
13+
saopayne@gmail.com
14+
"""
15+
16+
17+
class PlayReviews:
18+
19+
def get_reviews(self, app_id):
20+
sort_by_helpfulness = '1'
21+
reviews = []
22+
page_num = 0
23+
is_last_page = False
24+
25+
while is_last_page is False:
26+
if page_num % 3 == 0:
27+
time.sleep(random.random() + 1)
28+
(reviews_html_string, is_last_page) = self.fetch_reviews(app_id, page_num, sort_by_helpfulness)
29+
if len(reviews_html_string) > 0:
30+
reviews.extend(self.parse_reviews(reviews_html_string))
31+
page_num = page_num + 1
32+
print('Downloading reviews on page:', page_num)
33+
return reviews
34+
35+
def fetch_reviews(self, app_id, page_num, sort_by):
36+
url = "https://play.google.com/store/getreviews"
37+
querystring = {"authuser": "0"}
38+
payload = {'reviewType': '0', 'pageNum': page_num, 'id': app_id, 'xhr': '1', 'reviewSortOrder': sort_by}
39+
response = requests.request("POST", url, data=payload, params=querystring)
40+
41+
if len(response.content) > 4:
42+
response = response.content[4:]
43+
try:
44+
response = json.loads(response)
45+
if len(response) > 0:
46+
if len(response[0]) == 4:
47+
reviews_html = response[0][2]
48+
if response[0][1] == 1:
49+
is_last_page = False
50+
else:
51+
is_last_page = True
52+
return reviews_html, is_last_page
53+
else:
54+
return '', True
55+
else:
56+
return '', True
57+
58+
except ValueError as e:
59+
traceback.print_exc()
60+
print("Fetch prevented by Google")
61+
return '', True
62+
else:
63+
return '', True
64+
65+
def parse_reviews(self, reviews_html_string):
66+
67+
soup = BeautifulSoup(reviews_html_string, 'html.parser')
68+
review_body_list = soup.findAll("div", {"class": "review-body"})
69+
review_author_list = soup.findAll("span", {"class": "author-name"})
70+
review_date_list = soup.findAll("span", {"class": "review-date"})
71+
review_title_list = soup.findAll("span", {"class": "review-title"})
72+
review_rating_list = soup.findAll("div", {"class": "tiny-star"})
73+
reviews = []
74+
data_path = 'data/'
75+
if not os.path.exists(data_path):
76+
os.makedirs(data_path)
77+
78+
writer = csv.writer(open('data/reviews.csv', 'a'))
79+
writer.writerow(['Title', 'Body:', 'Date', 'Author', 'Rating'])
80+
for i in range(len(review_body_list)):
81+
current_row = [review_title_list[i].text, review_body_list[i].text, review_date_list[i].text,
82+
review_author_list[i].text,
83+
review_rating_list[i]['aria-label']]
84+
writer.writerow(current_row)
85+
reviews.append(current_row)
86+
return reviews
87+
88+
def fetch_and_write_reviews(self, application_id):
89+
self.get_reviews(application_id)
90+
return
91+
92+
93+
if __name__ == '__main__':
94+
if len(sys.argv) > 1:
95+
app_id = sys.argv[1]
96+
plr = PlayReviews()
97+
plr.fetch_and_write_reviews(app_id)

setup.cfg

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[metadata]
2+
description-file = Readme.md
3+

setup.py

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from setuptools import setup
2+
import os
3+
4+
if os.environ.get('USER','') == 'vagrant':
5+
del os.link
6+
7+
setup(
8+
name='PlayStoreReviewsDownloader',
9+
version='0.1',
10+
description='Package to scrape google play store reviews',
11+
author='Ademola Oyewale',
12+
author_email='saopayne@gmail.com',
13+
url='https://github.com/saopayne/PlayStoreReviewsDownloader',
14+
keywords=['google play store reviews', 'reviews', 'play store', 'app reviews'],
15+
classifiers=[],
16+
install_requires=[
17+
'requests',
18+
'beautifulsoup4',
19+
]
20+
21+
)

0 commit comments

Comments
 (0)