-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathext_dir_scanner.py
39 lines (30 loc) · 1.32 KB
/
ext_dir_scanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# Description: This script is used to scan a directory and count the number of files in each directory
# Author: James Sawyer
# Email: [email protected]
# Website: http://www.jamessawyer.co.uk/
from tabulate import tabulate
import os
import pandas as pd
# Set the directory to index
directory = "/Users/james/github-archive/repos/"
# Create a dictionary to store the file counts
file_counts = {}
# Recursively iterate through all directories and files in the directory
for root, dirs, files in os.walk(directory):
for filename in files:
# Get the file extension
file_extension = os.path.splitext(filename)[1]
# If the file extension is not in the dictionary, add it with a count
# of 1
if file_extension not in file_counts:
file_counts[file_extension] = 1
# Otherwise, increment the count for that file extension
else:
file_counts[file_extension] += 1
# Create a dataframe from the file counts dictionary
file_counts_df = pd.DataFrame.from_dict(
file_counts, orient="index", columns=["Count"])
# Sort the dataframe by the file counts in descending order
file_counts_df = file_counts_df.sort_values(by="Count", ascending=False)
# print the entire dataframe using python tabulate
print(tabulate(file_counts_df, headers="keys", tablefmt="psql"))