UML Fp

2 min read
import pandas as pd
from mlxtend.frequent_patterns import fpgrowth, association_rules
from graphviz import Digraph
class FPTreeNode:
def __init__(self, name, count, parent):
self.name = name
self.count = count
self.parent = parent
self.children = {}
self.link = None #For the linked list of the same items
class FPTree:
def __init__(self):
self.root = FPTreeNode("Root", 0, None)
self.headers = {} # stores the first instance
def add_transaction(self, transaction):
current_node = self.root
for item in transaction:
if item in current_node.children:
current_node.children[item].count +=1
else:
# Add a new child node
new_node = FPTreeNode(item, 1, current_node)
current_node.children[item] = new_node
# Update headers
if item not in self.headers:
self.headers[item] = new_node
else:
# Maintain linked list of the same items
last_node = self.headers[item]
while last_node.link is not None:
last_node = last_node.link
last_node.link = new_node
current_node = current_node.children[item]
def visualize(self):
dot = Digraph()
self._add_nodes(dot, self.root)
return dot
def _add_nodes(self, dot, node):
for child_name, child_node in node.children.items():
dot.node(str(child_node), f"{child_node.name} ({child_node.count})")
dot.edge(str(node), str(child_node))
self._add_nodes(dot, child_node)
# Sample transactional dataset
data = {
'Transaction_ID': [1,2,3,4,5],
'Milk': [1,1,0,1,0],
'Bread': [1,0,1,1,1],
'Butter': [1,1,0,1,0],
'Eggs': [0,1,1,0,1],
'Cheese': [0,1,1,1,0]
}
# Convert the dataset to a DataFrame
df = pd.DataFrame(data).set_index('Transaction_ID')
#Convert to boolean type
df_bool = df.astype(bool)
#Generate frequent itemsets using FP-Growth
frequent_itemsets = fpgrowth(df_bool, min_support=0.4, use_colnames=True)
#Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)
# Sort transactions by frequency
item_support = df_bool.sum().sort_values(ascending=False)
sorted_items = item_support.index.tolist()
# Sort transactions based on item order
sorted_transactions = []
for index, row in df_bool.iterrows():
sorted_row = [item for item in sorted_items if row[item]]
sorted_transactions.append(sorted_row)
# Construct FP-Tree
fp_tree = FPTree()
for transaction in sorted_transactions:
fp_tree.add_transaction(transaction)
# Visualise FP-Tree
fp_tree_graph = fp_tree.visualize()
fp_tree_graph.render("FP_Tree", format="png", cleanup=True)
# Display frequent itemsets and rules
print("Frequent Itemsets:")
print(frequent_itemsets)
print("\nAssociation Rules:")
print(rules)
# Save the FP-Tree visualisation
from IPython.display import Image
Image(filename="FP_Tree.png")
0
Subscribe to my newsletter
Read articles from Invoker directly inside your inbox. Subscribe to the newsletter, and don't miss out.
Written by
