Merge pull request #15 from yogeshwaran01/v_4.3-dev

version 4.3 update
yogeshwaran01 · Apr 6, 2021 · 01db865 · 01db865
2 parents 0ccd3dc + 2a66994
commit 01db865
Show file tree

Hide file tree

Showing 14 changed files with 645 additions and 381 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -26,10 +26,13 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install flake8 pytest
+        python -m pip install flake8
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 -v
+    - name: Test with unittest
+      run: |
+        python -m unittest
diff --git a/.gitignore b/.gitignore
@@ -10,5 +10,4 @@ instagramy/core/__pycache__
 .instagramy_cache
 instagramy/plugins/__pycache__
 .old
-methos.md
 instagramy/plugins/__pycache__
diff --git a/README.md b/README.md
@@ -23,8 +23,9 @@
     <img alt="Code style" src="https://img.shields.io/badge/codestyle-Black-blue"/>
     </a>
     <img alt="GitHub Repo size" src="https://img.shields.io/github/repo-size/yogeshwaran01/instagramy"/>
-    <img alt="Actions" src="https://github.com/yogeshwaran01/instagramy/workflows/Python%20package/badge.svg"/>
-    <img alt="Actions" src="https://github.com/yogeshwaran01/instagramy/workflows/Upload%20Python%20Package/badge.svg"/>
+    <a href="https://github.com/yogeshwaran01/instagramy/actions/workflows/python-publish.yml"><img alt="GitHub Actions" src="https://github.com/yogeshwaran01/instagramy/workflows/Upload%20Python%20Package/badge.svg"></a>
+    <a href="https://github.com/yogeshwaran01/instagramy/actions/workflows/python-package.yml"><img alt="GitHub Actions" src="https://github.com/yogeshwaran01/instagramy/workflows/Python%20package/badge.svg"></a>
+
 </p>
 
 </hr>
@@ -42,6 +43,7 @@ Scrape Instagram Users Information, Posts Details, and Hashtags details. This Pa
 - Download [Instagram post](#Plugins-for-Downloading-Posts) and [User profile picture](#Plugins-for-Downloading-Posts)
 - Have some [plugins](#Plugins) for Data analysis
 - No External dependencies
+- Having [caching Function](#Caching-Feature)
 - Lightweight
 - Easy to Use
 
@@ -81,7 +83,7 @@ For Login into Instagram via instagramy session id is required. No username or p
 
 **Note:** Check for session id frequently, It may be changed by Instagram
 
-<img src="./samples/sessionid.gif" width=100% height=100%>
+<img src="https://raw.githubusercontent.com/yogeshwaran01/instagramy/master/samples/sessionid.gif" width=100% height=100%>
 
 ### Instagram User details
 
@@ -103,6 +105,19 @@ True
 >>> user.user_data # More data about user as dict
 ```
 
+If you get the data of the user onetime, instagramy store the data as cache file for avoid the error. you can get the data from cache also. Don't provide the sessionid.
+
+```python
+>>> from instagramy import InstagramUser
+
+>>> user = InstagramUser('google', from_cache=True)
+
+>>> user.is_verified
+True
+```
+
+It is opt of all classes `InstagramUser`, `InstagramHashTag` and `InstagramPost`.
+
 <details><summary>Show all Properties</summary>
 <p>
 
@@ -206,6 +221,7 @@ Class `InstagramPost` scrape some of the information related to the particular p
 - number_of_comments
 - number_of_likes
 - post_source
+- text
 - type_of_post
 - upload_time
 
@@ -281,6 +297,21 @@ You can use this package without login. Sessionid is not required but it may ris
 >>> tag.tag_data
 ```
 
+### Caching Feature
+
+from version `4.3`, Added the new feature that is caching the required data. If you get the data of the user onetime, instagramy store the data as cache json file for avoid the error. you can get the data from cache also. Don't need to provide the sessionid. Instead of sessionid add the optional parameter `from_cache=True`.
+
+```python
+>>> from instagramy import InstagramUser
+
+>>> user = InstagramUser('google', from_cache=True)
+
+>>> user.is_verified
+True
+```
+
+It is opt of all classes `InstagramUser`, `InstagramHashTag` and `InstagramPost`.
+
 ## Sample Scripts
 
 You can get some Sample scripts [Here](https://yogeshwaran01.herokuapp.com/post/Instagramy-Python-Package-for-Instagram)

diff --git a/instagramy/InstagramHashTag.py b/instagramy/InstagramHashTag.py
@@ -9,50 +9,71 @@
     -------------
     ::
 
-        from instagramy.InstagramHashtag import InstagramHashtag
+        >>> from instagramy.InstagramHashtag import InstagramHashtag
 
         >>> tag = InstagramHashtag('python')
         >>> tag.number_of_posts
         >>> tag.top_posts
 
 """
-from datetime import datetime
-from collections import namedtuple
 
 from .core.parser import Parser
 from .core.parser import Viewer
+from .core.parser import TagParser
 from .core.exceptions import HashTagNotFound
 from .core.exceptions import RedirectionError
 from .core.exceptions import HTTPError
+from .core.cache import Cache
 from .core.requests import get
 
 
-class InstagramHashTag:
-    """
-    Class InstagramHashTag scrapes instagram hashtag information
+class InstagramHashTag(TagParser):
+    r"""
+    Scrapes instagram hashtag information
+
+    :param tag: Name of the Instagram Hashtag
+    :param sessionid (optional): Session id of Instagram which is in browser cookies
+    :param from_cache (optional): Get data from the cache of instagramy not from instagram
+
     >>> hashtag = InstagramHashTag("python")
     >>> hashtag.number_of_posts
     3119668
     >>> instagram_user.posts_display_urls
     """
 
-    def __init__(self, tag: str, sessionid=None):
+    def __init__(self, tag: str, sessionid=None, from_cache=False):
         self.url = f"https://www.instagram.com/explore/tags/{tag}/"
         self.sessionid = sessionid
-        data = self.get_json()
-        try:
-            self.tag_data = data["entry_data"]["TagPage"][0]["graphql"]["hashtag"]
-        except KeyError:
-            raise RedirectionError
+        cache = Cache("tag")
+        if from_cache:
+            if cache.is_exists(tag):
+                self.tag_data = cache.read_cache(tag)
+            else:
+                data = self.get_json()
+                cache.make_cache(
+                    tag, data["entry_data"]["TagPage"][0]["graphql"]["hashtag"]
+                )
+                self.tag_data = data["entry_data"]["TagPage"][0]["graphql"]["hashtag"]
+        else:
+            data = self.get_json()
+            cache.make_cache(
+                tag, data["entry_data"]["TagPage"][0]["graphql"]["hashtag"]
+            )
+            try:
+                self.tag_data = data["entry_data"]["TagPage"][0]["graphql"]["hashtag"]
+            except KeyError:
+                raise RedirectionError
         if sessionid:
-            self.viewer = Viewer(data=data["config"]["viewer"])
+            try:
+                self.viewer = Viewer(data=data["config"]["viewer"])
+            except UnboundLocalError:
+                self.viewer = None
         else:
             self.viewer = None
 
     def get_json(self) -> dict:
-        """
-        Return a dict of Hashtag information
-        """
+        """ Get Hashtag information from Instagram """
+
         try:
             html = get(self.url, sessionid=self.sessionid)
         except HTTPError:
@@ -61,78 +82,6 @@ def get_json(self) -> dict:
         parser.feed(html)
         return parser.Data
 
-    @property
-    def tagname(self) -> str:
-        """ Tagname of the Hagtag """
-        return self.tag_data["name"]
-
-    @property
-    def profile_pic_url(self) -> str:
-        """ Profile picture url of the Hagtag """
-        return self.tag_data["profile_pic_url"]
-
-    @property
-    def number_of_posts(self) -> int:
-        """ No.of posts in given Hashtag """
-        return self.tag_data["edge_hashtag_to_media"]["count"]
-
-    @property
-    def top_posts(self) -> list:
-        """
-        Top post data (<70) in the given Hashtag
-        """
-
-        post_lists = []
-        nodes = self.tag_data["edge_hashtag_to_media"]["edges"]
-        for node in nodes:
-            data = {}
-            try:
-                data["likes"] = node["node"]["edge_liked_by"]["count"]
-            except (KeyError, TypeError):
-                data["likes"] = None
-            try:
-                data["comments"] = node["node"]["edge_media_to_comment"]["count"]
-            except (KeyError, TypeError):
-                data["comments"] = None
-            try:
-                data["is_video"] = node["node"]["is_video"]
-            except (KeyError, TypeError):
-                data["is_video"] = None
-            try:
-                data["upload_time"] = datetime.fromtimestamp(
-                    node["node"]["taken_at_timestamp"]
-                )
-            except (KeyError, TypeError):
-                data["upload_time"] = None
-            try:
-                data["caption"] = node["node"]["accessibility_caption"]
-            except (KeyError, TypeError):
-                data["caption"] = None
-            try:
-                data["shortcode"] = node["node"]["shortcode"]
-            except (KeyError, TypeError):
-                data["shortcode"] = None
-            try:
-                data[
-                    "post_url"
-                ] = f'https://www.instagram.com/p/{node["node"]["shortcode"]}'
-            except (KeyError, TypeError):
-                data["post_url"] = None
-            try:
-                data["display_url"] = node["node"]["display_url"]
-            except (KeyError, TypeError):
-                data["display_url"] = None
-            nt = namedtuple("Post", data.keys())(*data.values())
-            post_lists.append(nt)
-        return post_lists
-
-    @property
-    def posts_display_urls(self) -> list:
-        """
-        Top post (<70) in the given Hashtag
-        """
-        return [i["display_url"] for i in self.top_posts]
-
     def __repr__(self) -> str:
         return f"{self.__class__.__name__}('{self.tagname}')"