UML Fp

InvokerInvoker
2 min read
import pandas as pd
from mlxtend.frequent_patterns import fpgrowth, association_rules
from graphviz import Digraph

class FPTreeNode:
    def __init__(self, name, count, parent):
        self.name = name
        self.count = count
        self.parent = parent
        self.children = {}
        self.link = None  #For the linked list of the same items

class FPTree:
    def __init__(self):
        self.root = FPTreeNode("Root", 0, None)
        self.headers = {} # stores the first instance

    def add_transaction(self, transaction):
        current_node = self.root
        for item in transaction:
            if item in current_node.children:
                current_node.children[item].count +=1
            else:
                # Add a new child node
                new_node = FPTreeNode(item, 1, current_node)
                current_node.children[item] = new_node
                # Update headers
                if item not in self.headers:
                    self.headers[item] = new_node
                else:
                    # Maintain linked list of the same items
                    last_node = self.headers[item]
                    while last_node.link is not None:
                        last_node = last_node.link
                    last_node.link = new_node
            current_node = current_node.children[item]

    def visualize(self):
        dot = Digraph()
        self._add_nodes(dot, self.root)
        return dot

    def _add_nodes(self, dot, node):
        for child_name, child_node in node.children.items():
            dot.node(str(child_node), f"{child_node.name} ({child_node.count})")
            dot.edge(str(node), str(child_node))
            self._add_nodes(dot, child_node)

# Sample transactional dataset
data = {
    'Transaction_ID': [1,2,3,4,5],
    'Milk': [1,1,0,1,0],
    'Bread': [1,0,1,1,1],
    'Butter': [1,1,0,1,0],
    'Eggs': [0,1,1,0,1],
    'Cheese': [0,1,1,1,0]
}

# Convert the dataset to a DataFrame
df = pd.DataFrame(data).set_index('Transaction_ID')

#Convert to boolean type
df_bool = df.astype(bool)

#Generate frequent itemsets using FP-Growth
frequent_itemsets = fpgrowth(df_bool, min_support=0.4, use_colnames=True)

#Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

# Sort transactions by frequency
item_support = df_bool.sum().sort_values(ascending=False)
sorted_items = item_support.index.tolist()

# Sort transactions based on item order
sorted_transactions = []
for index, row in df_bool.iterrows():
    sorted_row = [item for item in sorted_items if row[item]]
    sorted_transactions.append(sorted_row)

# Construct FP-Tree
fp_tree = FPTree()
for transaction in sorted_transactions:
    fp_tree.add_transaction(transaction)

# Visualise FP-Tree
fp_tree_graph = fp_tree.visualize()
fp_tree_graph.render("FP_Tree", format="png", cleanup=True)

# Display frequent itemsets and rules
print("Frequent Itemsets:")
print(frequent_itemsets)

print("\nAssociation Rules:")
print(rules)

# Save the FP-Tree visualisation
from IPython.display import Image
Image(filename="FP_Tree.png")
0
Subscribe to my newsletter

Read articles from Invoker directly inside your inbox. Subscribe to the newsletter, and don't miss out.

Written by

Invoker
Invoker