rename files

dexhunter · Jan 19, 2017 · f2893fe · f2893fe
1 parent 73c049c
commit f2893fe
Show file tree

Hide file tree

Showing 51 changed files with 13,657 additions and 7 deletions.
diff --git a/PrideandPrejudice.txt b/PrideandPrejudice.txt
diff --git a/README.md b/README.md
@@ -98,4 +98,8 @@ Arrr, back to work.
 Low effiency :/ I did 2 different versions for exercise 12-1, 12-2.
 
 ### _14/01/2017_
-Finally finish chapter 12, takes a whole day to do exercise 12-4 and read a lot posts on stackoverflow, think learned a lot 
+Finally finish chapter 12, takes a whole day to do exercise 12-4 and read a lot posts on stackoverflow. I tried several methods but found the author's answer is still best, so I rewrote the code in my own way (actually the same). A friend told me another way of solving exercise 12-4 which is interesting (a bottom-up approach), I will definitely try the method later!
+
+For now, let me skip it and chapter 13, will come back later next week.
+
+### 
diff --git a/TP reading notes.md b/TP reading notes.md
@@ -109,4 +109,22 @@ _ has 3 main conventional uses in Python:
 * use 'in' to check whether a value is in the list
 
 
+# Chapter 13
+
+# Chapter 14 Files
+
+* persistence: run for a long time (or all the time), keep at least some of data in permanent storage
+* trasience: run for a short time, when the program ends, data vanish as well
+
+### Reading and Writing
+
+* If the file already exists, opening it in write mode clears out the old data and starts
+fresh, so be careful!
+
+* When the first oprand is a string, % is a format operator
+* relative path
+* absolute path, starts wtih '/'
+* dbm is a module in Python 3! 
+* pipe object: an object representing a running program
+* What is pickle? pickle is a module implements serializing and de-serializing a Python object structure. "Pickling" is the process whereby a Python object hierarchy is converted into a byte stream, and "unpickling" is the inverse operation
 
diff --git a/anagram_sets.py b/anagram_sets.py
@@ -0,0 +1,101 @@
+"""This module contains a code example related to
+
+Think Python, 2nd Edition
+by Allen Downey
+http://thinkpython2.com
+
+Copyright 2015 Allen Downey
+
+License: http://creativecommons.org/licenses/by/4.0/
+
+This file is used in exercise 14-2
+"""
+
+from __future__ import print_function, division
+
+
+def signature(s):
+    """Returns the signature of this string.
+
+    Signature is a string that contains all of the letters in order.
+
+    s: string
+    """
+    # TODO: rewrite using sorted()
+    t = list(s)
+    t.sort()
+    t = ''.join(t)
+    return t
+
+
+def all_anagrams(filename):
+    """Finds all anagrams in a list of words.
+
+    filename: string filename of the word list
+
+    Returns: a map from each word to a list of its anagrams.
+    """
+    d = {}
+    for line in open(filename):
+        word = line.strip().lower()
+        t = signature(word)
+
+        # TODO: rewrite using defaultdict
+        if t not in d:
+            d[t] = [word]
+        else:
+            d[t].append(word)
+    return d
+
+
+def print_anagram_sets(d):
+    """Prints the anagram sets in d.
+
+    d: map from words to list of their anagrams
+    """
+    for v in d.values():
+        if len(v) > 1:
+            print(len(v), v)
+
+
+def print_anagram_sets_in_order(d):
+    """Prints the anagram sets in d in decreasing order of size.
+
+    d: map from words to list of their anagrams
+    """
+    # make a list of (length, word pairs)
+    t = []
+    for v in d.values():
+        if len(v) > 1:
+            t.append((len(v), v))
+
+    # sort in ascending order of length
+    t.sort()
+
+    # print the sorted list
+    for x in t:
+        print(x)
+
+
+def filter_length(d, n):
+    """Select only the words in d that have n letters.
+
+    d: map from word to list of anagrams
+    n: integer number of letters
+
+    returns: new map from word to list of anagrams
+    """
+    res = {}
+    for word, anagrams in d.items():
+        if len(word) == n:
+            res[word] = anagrams
+    return res
+
+
+if __name__ == '__main__':
+    anagram_map = all_anagrams('words.txt')
+    print_anagram_sets_in_order(anagram_map)
+
+    eight_letters = filter_length(anagram_map, 8)
+    print_anagram_sets_in_order(eight_letters)
+
diff --git a/captions b/captions
diff --git a/data.pkl b/data.pkl
diff --git a/ex3_1.py → ex03_1.py b/ex3_1.py → ex03_1.py
diff --git a/ex3_2.py → ex03_2.py b/ex3_2.py → ex03_2.py
diff --git a/ex3_3.py → ex03_3.py b/ex3_3.py → ex03_3.py
diff --git a/ex4_1.py → ex04_1.py b/ex4_1.py → ex04_1.py
diff --git a/ex4_1_1.py → ex04_1_1.py b/ex4_1_1.py → ex04_1_1.py
diff --git a/ex4_2.py → ex04_2.py b/ex4_2.py → ex04_2.py
diff --git a/ex4_3.py → ex04_3.py b/ex4_3.py → ex04_3.py
diff --git a/ex4_5.py → ex04_5.py b/ex4_5.py → ex04_5.py
diff --git a/ex5_1.py → ex05_1.py b/ex5_1.py → ex05_1.py
diff --git a/ex5_2.py → ex05_2.py b/ex5_2.py → ex05_2.py
diff --git a/ex5_3.py → ex05_3.py b/ex5_3.py → ex05_3.py
diff --git a/ex5_4.py → ex05_4.py b/ex5_4.py → ex05_4.py
diff --git a/ex5_5.py → ex05_5.py b/ex5_5.py → ex05_5.py
diff --git a/ex5_6.py → ex05_6.py b/ex5_6.py → ex05_6.py
diff --git a/ex6_1.py → ex06_1.py b/ex6_1.py → ex06_1.py
diff --git a/ex6_2.py → ex06_2.py b/ex6_2.py → ex06_2.py
diff --git a/ex6_3.py → ex06_3.py b/ex6_3.py → ex06_3.py
diff --git a/ex6_4.py → ex06_4.py b/ex6_4.py → ex06_4.py
diff --git a/ex6_5.py → ex06_5.py b/ex6_5.py → ex06_5.py
diff --git a/ex7_1.py → ex07_1.py b/ex7_1.py → ex07_1.py
diff --git a/ex7_2.py → ex07_2.py b/ex7_2.py → ex07_2.py
diff --git a/ex7_3.py → ex07_3.py b/ex7_3.py → ex07_3.py
diff --git a/ex7_4.py → ex07_4.py b/ex7_4.py → ex07_4.py
diff --git a/ex8_1.py → ex08_1.py b/ex8_1.py → ex08_1.py
diff --git a/ex8_2.py → ex08_2.py b/ex8_2.py → ex08_2.py
diff --git a/ex8_3.py → ex08_3.py b/ex8_3.py → ex08_3.py
diff --git a/ex8_4.py → ex08_4.py b/ex8_4.py → ex08_4.py
diff --git a/ex8_5.py → ex08_5.py b/ex8_5.py → ex08_5.py
diff --git a/ex9_1.py → ex09_1.py b/ex9_1.py → ex09_1.py
diff --git a/ex9_2.py → ex09_2.py b/ex9_2.py → ex09_2.py
diff --git a/ex9_3.py → ex09_3.py b/ex9_3.py → ex09_3.py
diff --git a/ex9_4.py → ex09_4.py b/ex9_4.py → ex09_4.py
diff --git a/ex9_5.py → ex09_5.py b/ex9_5.py → ex09_5.py
diff --git a/ex9_6.py → ex09_6.py b/ex9_6.py → ex09_6.py
diff --git a/ex9_7.py → ex09_7.py b/ex9_7.py → ex09_7.py
diff --git a/ex9_8.py → ex09_8.py b/ex9_8.py → ex09_8.py
diff --git a/ex9_9.py → ex09_9.py b/ex9_9.py → ex09_9.py
diff --git a/ex12_4.py b/ex12_4.py
@@ -7,7 +7,6 @@
 I used others' codes to test but found only when using memo can you improve performance significatnly, otherwise the process is really slow
 '''
 
-
 from collections import defaultdict
 
 memo = {}
@@ -29,7 +28,7 @@ def children(word, d):
 
 	rest = []
 	for x in child(word, d):
-		if children(x, d):
+		if children(x, d): #if list is not empty
 			rest.append(x)
 
 	memo[word] = rest
@@ -43,9 +42,6 @@ def all_children(d):
 			res.append(word)
 	return res
 
-
-
-
 def map_words(filename="words.txt"):
 	d = defaultdict(list)
 	with open(filename) as fin:
@@ -89,4 +85,9 @@ def list_of_words(filename="words.txt"):
 	return l
 '''
 
-print_longest(map_words())
+print_longest(map_words())
+
+
+'''
+!another thought: start from the smallest 'a', 'i', and to get the longest consequence.
+'''
diff --git a/ex13_1.py b/ex13_1.py
@@ -0,0 +1,18 @@
+'''
+According to http://stackoverflow.com/questions/3900054/python-strip-multiple-characters,
+ string.translate(string.maketrans()) is the fastest way to remove undesired chars
+ 
+ maketrans syntax: str.maketrans(intab, outtab)
+'''
+import string
+
+def read_file(filename):
+	'''Reads a file and return stripped strings
+	
+	filename: a string
+	'''
+	remove_char = string.punctuation + string.whitespace
+	with open(filename) as fin:
+		for line in fin:
+			yield line.translate(line.maketrans("", "", ), remove_char).lower()
+
diff --git a/ex14_1.py b/ex14_1.py
@@ -0,0 +1,19 @@
+from string import maketrans
+
+def sed(s, replace_string, input, output):
+	fin = open(input, 'r')
+	fout = open(output, 'w')
+
+	trantab = maketrans(s, replace_string) # translate table
+
+
+	for word in fin:
+		re_word = word.translate(trantab) 
+		fout.write(re_word)
+
+	fin.close()
+	fout.close()
+
+
+if __name__ == '__main__':
+	sed('p', 'q', 'test.txt', 'output.txt') #success with this one
diff --git a/ex14_2.py b/ex14_2.py
@@ -0,0 +1,47 @@
+''' pickle is GREAT for this saving specified data structure task.
+'''
+import pickle
+from collections import defaultdict
+
+def load_words(filename = "words.txt"):
+	with open(filename) as f:
+		for word in f:
+			yield word.rstrip() # return a generator
+
+def all_anagram(l):
+	'''Reads a list and return a set of anagram words
+	
+	l: list
+	
+	Returns: set
+	'''
+	d = defaultdict(list) # avoid KeyError in dict()
+	for word in l:
+		signature = "".join(sorted(word)) #sorted: leave the original word untouched
+		d[signature].append(word)
+
+	for k, v in d.items(): #remove d[k] if there is only one value corresponding to the key which means there is no anagram for the word 
+		if len(v) == 1:
+			del d[k]
+
+	return d
+
+def save_dict(d):
+    with open('shelf.pkl', 'wb') as f: #wb for write byte
+        pickle.dump(d, f, pickle.HIGHEST_PROTOCOL)
+
+def look_up_dict(word):
+	#look up a word and return its anagram in the 'shelf'
+    with open('shelf.pkl', 'rb') as f: #rb for read byte
+        d = pickle.load(f)
+	for k, v in d.iteritems():
+			for i in v:
+				if i == word:
+					return v
+
+if __name__ == '__main__':
+	books = all_anagram(load_words())
+	save_dict(books)
+	print ''
+	print look_up_dict('tired')
+	print look_up_dict('cosets')
diff --git a/output.txt b/output.txt
@@ -0,0 +1,4 @@
+Aqqle
+Banana
+Cheery
+qitaya
diff --git a/rename.py b/rename.py
@@ -0,0 +1,7 @@
+# change the name to fit Github listing convention
+import os
+
+for file in os.listdir():
+	if file.startswith(("ex3", "ex4", "ex5", "ex6", "ex7", "ex8",  "ex9")):
+		os.rename(file, file.replace("ex", "ex0"))
+
diff --git a/shelf.pkl b/shelf.pkl
diff --git a/test.txt b/test.txt
@@ -0,0 +1,4 @@
+Apple
+Banana
+Cheery
+pitaya
-Original file line number
+Diff line change
@@ -0,0 +1,4 @@
+    Aqqle
+    Banana
+    Cheery
+    qitaya