Dependency(canonical)
A. Third Edition
This is the third edition of my "dependency" and I think it is almost the perfect version of all because it
implements almost all functions you can name---canonical cover, decomposition, lossless-join-check.
"Function dependency" is a very important issue in database theory. And what's more, it is the essence of all.
Because I think it is the key of knowledge: the world is described by relations and relations of relations.
For the function "decomposition", I don't want to export an array of sets of the decomposition. The simple reason
is who would use it this way. So, I simply "cout" them.
E.Further improvement
F.File listing
1. rules.h
2. rules.cpp
3. set.h
4. set.cpp
5. main.cpp (main)
file name: set.h
#ifndef SET_H
#define SET_H
#include <iostream>
#include <bitset>
using namespace std;
const int MaxAttrNumber=20;
class Set
{
//this is a long-pain for me, I have no other way to
//let compiler recognize this "friend" function outside declaration
friend ostream& operator<<(ostream& out, const Set& dummy)
{
for (int i=0; i<dummy.size; i++)
{
if (dummy.theSet.test(i))
{
out<<'1';
}
else
{
out<<'0';
}
}
return out;
}
private:
bitset<MaxAttrNumber> theSet;
int size;
int current;
public:
void setSize(const Set& dummy);
int getSize(){ return size;}
int next(int current) const;
int first() const;
int count() const;
Set intersection(const Set& dummy) const;
Set unionSet(const Set& dummy) const;
Set difference(const Set& dummy) const;
//I am crazy about operator overloading!!!:)
Set operator-(const Set& dummy) const;
Set operator+(const Set& dummy) const;
Set operator*(const Set& dummy) const;
void operator=(const Set& dummy);
bool operator==(const Set& dummy) const;
bool operator!=(const Set& dummy) const;
bool operator>(const Set& dummy) const;
bool operator>=(const Set& dummy) const;
bool operator<(const Set& dummy) const;
bool operator<=(const Set& dummy) const;
void set(int pos);
void forEachSubSet(Set& dummy) const;//must be called before "eachSub()"
bool eachSub(Set& dummy) const;
void set();
void reset(int pos);
void reset();
bool test(int pos) const;
bool isIn(const Set& dummy) const;
void setSize(int theSize) {size=theSize;}
Set(int theSize=10);
};
#endif
file name: set.cpp
#include "Set.h"
bool Set::isIn(const Set& dummy) const
{
for (int i=0; i<size; i++)
{
if (theSet.test(i))
{
if (!dummy.test(i))//here I use Set.test() instead of set.test()
{
return false;
}
}
}
return true;
}
bool Set::test(int pos) const
{
return (pos<size&&theSet.test(pos));
}
//current=-1;//initialize to -1 to prepare for being called
int Set::next(int current) const
{
for (int i=current+1; i<size; i++)//include situation current>=size
{
if (theSet.test(i))
{
return i;
}
}
return -1;//not found
}
bool Set::operator !=(const Set& dummy)const
{
return !(this->operator ==(dummy));
}
bool Set::operator <(const Set& dummy)const
{
return (this->isIn(dummy)&&this->operator !=(dummy));
}
bool Set::operator <=(const Set& dummy)const
{
return isIn(dummy);
}
bool Set::operator >(const Set& dummy)const
{
return !(this->operator <=(dummy));
}
bool Set::operator >=(const Set& dummy)const
{
return !(this->operator <(dummy));
}
bool Set::operator ==(const Set& dummy)const
{
for (int i=0; i<(size>dummy.size?size:dummy.size); i++)
{
if (test(i)^dummy.test(i))
{
return false;
}
}
return true;
}
void Set::setSize(const Set& dummy)
{
size=dummy.size;
}
void Set::operator =(const Set& dummy)
{
size=dummy.size;
for (int i=0; i<size; i++)
{
if (dummy.test(i))
{
theSet.set(i);
}
else
{
theSet.reset(i);
}
}
}
Set::Set(int theSize)
{
size=theSize;
reset();
}
void Set::reset()
{
for (int i=0; i<size; i++)
{
theSet.reset(i);
}
}
void Set::reset(int pos)
{
if (pos<size)
{
theSet.reset(pos);
}
}
void Set::set()
{
theSet.set();
}
void Set::set(int pos)
{
theSet.set(pos);
}
void Set::forEachSubSet(Set& dummy) const
{
dummy.size=size;
dummy.reset();//emptyset
}
bool Set::eachSub(Set& dummy) const
{
int index=first();//starting from very first
while (index!=-1)//not exceeding boundery
{
if (dummy.test(index))
{
dummy.reset(index);
index=next(index);
}
else
{
dummy.set(index);
if (dummy<*this)
{
return true;
}
}
}
return false;
}
int Set::first()const
{
return next(-1);
}
int Set::count()const
{
return theSet.count();
}
Set Set::unionSet(const Set& dummy) const
{
Set result;
result.size=size>dummy.size?size:dummy.size;
for (int i=0; i<result.size; i++)
{
if (test(i)||dummy.test(i))
{
result.set(i);
}
}
return result;//this is a temparory object;
}
Set Set::difference(const Set& dummy) const
{
Set result;
result.size=size>dummy.size?size:dummy.size;
for (int i=0; i<result.size; i++)
{
if (test(i)&&!dummy.test(i))
{
result.set(i);
}
}
return result;
}
Set Set::operator +(const Set& dummy) const
{
return unionSet(dummy);
}
Set Set::operator -(const Set& dummy) const
{
return difference(dummy);
}
Set Set::intersection(const Set& dummy) const
{
Set result;
result.size=size<dummy.size?size:dummy.size;
for (int i=0; i<result.size; i++)
{
if (test(i)&&dummy.test(i))
{
result.set(i);
}
}
return result;
}
Set Set::operator *(const Set& dummy) const
{
return intersection(dummy);
}
file name: rules.h
#include <iostream>
#include <bitset>
#include "set.h"
using namespace std;
//make it simple, the first line must list all variables or attributes
//"relation"(A,B,C...)
const int MaxNameLength=5;
const int MaxRuleNumber=100;
const int MaxKeyNumber=10;
const int MaxComposition=10;
class Rule
{
friend class Rules;
private:
Set lhs;
Set rhs;
public:
Rule();
bool test(int pos, bool isLeft);
void lhsSet(int pos);
void rhsSet(int pos);
void setSize(int theSize);
void set(int pos, bool isLeft);
void setRule(const Set& left, const Set& right);
void operator=(const Rule& dummy);
bool operator==(const Rule& dummy);
};
class Rules
{
private:
Set leftSet;
Set rightSet;
Set attrClosure[MaxAttrNumber];
char attributes[MaxAttrNumber][MaxNameLength+1];
char relationName[MaxNameLength+1];
bool needKey;//this is needed for checklossless
int keyCount;
int attrCount;
int attrFound[MaxAttrNumber];
int numberFound;
int searchByChar(char ch, int step);
void ruleReading(FILE* stream);
Rule rules[MaxRuleNumber];
int ruleNumber;
void displayRule(int index);
void removeRule(int index);
void calcClosure();
void addRule(const Set& left, const Set& right);
int extractNames(FILE* stream, char* delimeters, char endChar='\n');
void closure(Set& attrSet, int maskedRule) const;
bool checkAttr(int index);
void checkRule(int index);
void split();
void combine();
//"this" relation imply "dummy" relation
bool imply(const Rules& dummy) const;
void showMatrix(const Set* matrix, int row);
public:
void canonical();
void decomposition();
Set candidateKey[MaxKeyNumber];
void addRule(const Rule& dummy);
void readFile(const char* fileName);
void display();
void display(const Set& attrSet);
int findKey();
bool checkLossless();
bool eachKey(Set& dummy);
bool operator==(const Rules& dummy) const;
Rules();
};
file name: rules.cpp
#include "Rules.h"
void Rules::decomposition()
{
bool keyFound=false;
Set dummy;
findKey();
canonical();
for (int i=0; i<ruleNumber; i++)
{
dummy=rules[i].lhs+rules[i].rhs;
cout<<"\ndecomposition #"<<i+1<<":";
display(dummy);
cout<<"\ndependency is:";
displayRule(i);
cout<<endl;
for (int j=0; j<keyCount; j++)
{
if (candidateKey[j]<=dummy)
{
keyFound=true;//some decomposition contains some key
}
}
}
if (!keyFound)
{
cout<<"\ndecomposition #"<<ruleNumber+1<<":";
display(candidateKey[0]);
cout<<"\nthe key has no particular dependency\n";
}
needKey=!keyFound;
}
//English can be ambiguous: this function really means:
//"this" relation imply "dummy" relation
bool Rules::imply(const Rules& dummy) const
{
Set left;
//for all dependency in "dummy" relation...
for (int i=0; i<dummy.ruleNumber; i++)
{
left=dummy.rules[i].lhs;
//if the closure of that particular dependency under
//dependency of "this" relation
//cannot contains its rhs in that dependency in "dummy"
//relation, we say...
closure(left, -1);
if (!(dummy.rules[i].rhs<=left))
{
//we say "dummy" relation is not implied by "this" relation
return false;
}
}
//can we?????
return true;
}
//the "secret" name for this function is "equivalent"
bool Rules::operator ==(const Rules& dummy) const
{
return imply(dummy)&&dummy.imply(*this);
}
void Rules::canonical()
{
int i;
bool found;
split();
//cout<<"\nafter split";
//display();
do
{
i=0;
found=false;
while (i<ruleNumber)
{
if (checkAttr(i))
{
found=true;
}
i++;
}
}while (found);
//cout<<"\nafter checkattr";
//display();
for (i=0; i<ruleNumber; i++)
{
checkRule(i);
}
//cout<<"\nafter check rule";
//display();
combine();
}
void Rules::combine()
{
for (int i=0; i<ruleNumber; i++)
{
for (int j=i+1; j<ruleNumber; j++)
{
if (rules[i].lhs==rules[j].lhs)
{
rules[i].rhs=rules[i].rhs+rules[j].rhs;
removeRule(j);
}
}
}
}
void Rules::removeRule(int index)
{
if (index<ruleNumber)
{
ruleNumber--;
for (int i=index; i<ruleNumber; i++)
{
rules[i]=rules[i+1];
}
}
}
void Rules::checkRule(int index)
{
Set dummy;
dummy=rules[index].lhs;
closure(dummy, index);
if (rules[index].rhs<=dummy)
{
removeRule(index);
}
}
bool Rules::checkAttr(int index)
{
Set dummy;
//for the out parameter "dummy", you cannot modify it!
rules[index].lhs.forEachSubSet(dummy);
while (rules[index].lhs.eachSub(dummy))
{
Set old;
//so, you have to use a copy of dummy to calc closure of it
old=dummy;
closure(old, index);
if (rules[index].rhs<=old)
{
rules[index].lhs=dummy;
for (int i=0; i<ruleNumber; i++)
{
if (i!=index)
{
if (rules[i]==rules[index])
{
//found repeat
removeRule(index);
}
}
}
//otherwise do nothing
return true;
}
}
return false;
}
int Rules::findKey()
{
Set theLeft, theRight;
bool isSub;
leftSet.setSize(attrCount);
rightSet.setSize(attrCount);
leftSet.set();//the universal set
rightSet.reset();//the empty set
for (int i=0; i<ruleNumber; i++)
{
rightSet= rightSet+rules[i].rhs;
}
rightSet=leftSet-rightSet;//rightSet is the minimum part of candidate key
leftSet=leftSet-rightSet;//leftSet is the difference of rightSet,should be added to key
keyCount=0;
theRight=rightSet;
theLeft=leftSet;
closure(theRight, -1);
if (theRight.count()==attrCount)//the only key
{
candidateKey[keyCount++]=rightSet;
}
else
{
leftSet.forEachSubSet(theLeft);
while (leftSet.eachSub(theLeft))
{
isSub=false;
theRight=rightSet;
theRight=theRight+theLeft;
for (int i=0; i<keyCount; i++)
{
//display(theRight);
//display(candidateKey[i]);
if (candidateKey[i]<theRight)
{
isSub=true;
break;
}
}
if (isSub)
{
continue;//if subset of any candidate key, no need to test
}
Set temp;
temp=theRight;
closure(temp, -1);
if (temp.count()==attrCount)
{
candidateKey[keyCount++]=theRight;
}
}
}
return keyCount;
}
void Rules::showMatrix(const Set* matrix, int row)
{
for (int j=0; j<attrCount; j++)
{
cout<<"\t"<<attributes[j];
}
cout<<"\n";
for (int i=0; i<row; i++)
{
displayRule(i);
cout<<"\t"<<matrix[i]<<endl;
}
}
//this is a standard algorithm and it makes sure
//that all dependency agree with each other
//or in good English that there is some common agreement
//between all dependency. Is this good enough? I doubt it.
bool Rules::checkLossless()
{
int row=needKey?ruleNumber+1:ruleNumber;
Set* matrix=new Set[row];
for (int i=0; i<row; i++)
{
if (needKey&&i==row-1)
{
matrix[i]=candidateKey[0];
}
else
{
matrix[i]=rules[i].lhs+rules[i].rhs;
}
}
showMatrix(matrix, row);
int index;
bool found;
do
{
index=0;
found=false;
while (index<row)
{
for (int j=0; j<ruleNumber; j++)
{
if (j!=index)
{
if (rules[j].lhs<=matrix[index])
{
if (!(rules[j].rhs<=matrix[index]))
{
matrix[index]=matrix[index]+rules[j].rhs;
found=true;
}
}
}
}
index++;
}
}while(found);
showMatrix(matrix, row);
for (index=0; index<row; index++)
{
if (matrix[index].count()==attrCount)
{
delete []matrix;
return true;
}
}
delete [] matrix;
return false;
}
void Rules::calcClosure()
{
for (int i=0; i<attrCount; i++)
{
attrClosure[i].setSize(attrCount);
attrClosure[i].reset();
attrClosure[i].set(i);
closure(attrClosure[i], -1);
}
}
void Rules::closure(Set& attrSet, int maskedRule) const
{
bool found=false;
int i=0;
do
{
i=0;
found=false;
while (i<ruleNumber)
{
if (i!=maskedRule)//this rule will not be calculated
{
if (rules[i].lhs<=attrSet)//lhs is contained
{
if ((attrSet*rules[i].rhs)!=rules[i].rhs)
{
attrSet=attrSet+rules[i].rhs;
found=true;
}
}
}
i++;
}
}
while (found);
}
void Rules::display(const Set& attrSet)
{
bool first=true;
cout<<"{";
for (int i=0; i<attrCount; i++)
{
if (attrSet.test(i))
{
if (first)
{
first=false;
}
else
{
cout<<",";
}
cout<<attributes[i];
}
}
cout<<"}";
}
void Rules::display()
{
cout<<"\nnow display\n";
cout<<relationName<<"(";
for (int i=0; i<attrCount; i++)
{
cout<<attributes[i];
if (i!=attrCount-1)
{
cout<<",";
}
else
{
cout<<");\n";
}
}
for (i=0; i<ruleNumber; i++)
{
displayRule(i);
}
}
bool Rule::test(int pos, bool isLeft)
{
if (isLeft)
{
return lhs.test(pos);
}
else
{
return rhs.test(pos);
}
}
void Rules::displayRule(int index)
{
for (int i=0; i<attrCount; i++)
{
if (rules[index].test(i, true))
{
cout<<attributes[i];
}
}
cout<<" -> ";
for (i=0; i<attrCount; i++)
{
if (rules[index].test(i, false))
{
cout<<attributes[i];
}
}
cout<<";\n";
}
void Rule::set(int pos, bool isLeft)
{
if (isLeft)
{
lhsSet(pos);
}
else
{
rhsSet(pos);
}
}
void Rule::rhsSet(int pos)
{
rhs.set(pos);
}
void Rule::lhsSet(int pos)
{
lhs.set(pos);
}
Rule::Rule()
{
lhs.reset();
rhs.reset();
}
void Rules::readFile(const char* fileName)
{
FILE* stream;
stream=fopen(fileName, "r");
attrCount=extractNames(stream, "=,()", ';');//ignore the first relation name
ruleReading(stream);
}
void Rules::ruleReading(FILE* stream)
{
char ch;
int step=0;
ruleNumber=0;//initialize
bool isLeft=true;
rules[ruleNumber].setSize(attrCount);//do twice!!
while (!feof(stream))
{
ch=fgetc(stream);
if (ch==';'||ch==',')
{
ruleNumber++;
rules[ruleNumber].setSize(attrCount);//do twice!!
isLeft=true;
}
else
{
if (ch=='-'||ch=='>')
{
isLeft=false;
}
else
{
if (isalpha(ch))
{
searchByChar(ch, step);
if (numberFound!=1)
{
if (step<MaxNameLength)
{
step++;
}
else
{
cout<<"illegal attribute stated!\n";
exit(1);
}
}
else
{
step=0;
rules[ruleNumber].set(attrFound[0], isLeft);
}
}
}
}
}
}
void Rule::setSize(int theSize)
{
lhs.setSize(theSize);
rhs.setSize(theSize);
}
int Rules::searchByChar(char ch, int step)
{
if (step==0)//this is first time, and all attributes are searched
{
numberFound=0;
for (int i=0; i<attrCount; i++)//
{
if (ch==attributes[i][step])
{
attrFound[numberFound]=i;
numberFound++;
}
}
}
else
{
int number=0;//new 'attrFound' re-counting
for (int i=0; i<numberFound; i++)
{
if (ch==attributes[attrFound[i]][step])
{
attrFound[number]=i;
number++;
}
}
numberFound=number;
}
return numberFound;
}
int findChar(char ch, char* str)
{
int index=0;
while (str[index]!='\0')
{
if (str[index]==ch)
{
return index;
}
index++;
}
return -1;
}
//extract names from a line delimetered by various chars, like ',','(',')'....
int Rules::extractNames(FILE* stream, char* delimeters, char endChar)
{
int findChar(char ch, char* str);
char ch;
int nameIndex=0;
char buffer[MaxNameLength+1];
char* ptr=buffer;
while (!feof(stream))
{
ch=getc(stream);
if (ch==endChar)
{
if (ptr!=buffer)
{
*ptr='\0';
strcpy(attributes[nameIndex], buffer);
}
break;
}
if (findChar(ch, delimeters)>0)//deli reached
{
//skip the consecutive deli
if (ptr!=buffer)
{
*ptr='\0';
if (ch=='(')//the very first one
{
strcpy(relationName, buffer);
}
else
{
strcpy(attributes[nameIndex], buffer);
nameIndex++;
}
ptr=buffer;//restart
}
}
else
{
*ptr=ch;
ptr++;
}
}
return nameIndex;
}
Rules::Rules()
{
numberFound=attrCount=0;
}
void Rule::operator =(const Rule& dummy)
{
lhs=dummy.lhs;
rhs=dummy.rhs;
}
void Rules::addRule(const Rule& dummy)
{
rules[ruleNumber++]=dummy;
}
void Rules::addRule(const Set& left, const Set& right)
{
rules[ruleNumber++].setRule(left, right);
}
void Rule::setRule(const Set& left, const Set& right)
{
lhs=left;
rhs=right;
}
void Rules::split()
{
int index;
for (int i=0; i<ruleNumber; i++)
{
if (rules[i].rhs.count()>1)
{
index=rules[i].rhs.first();
index=rules[i].rhs.next(index);//starting from second one
while (index!=-1)
{
Set right;
right.setSize(rules[i].rhs);
right.set(index);
rules[i].rhs.reset(index);//remove from old rule
addRule(rules[i].lhs, right);
index=rules[i].rhs.next(index);
}
}
}
}
bool Rule::operator ==(const Rule& dummy)
{
return (lhs==dummy.lhs&&rhs==dummy.rhs);
}
file name: main.cpp (main)
#include "Rules.h"
#include "set.h"
int main()
{
int number;
Rules R;
R.readFile("d:\\rules.txt");
R.display();
number=R.findKey();
for (int i=0; i<number; i++)
{
R.display(R.candidateKey[i]);
cout<<"\n";
}
R.canonical();
R.decomposition();
R.checkLossless();
return 0;
}
Here goes the result of test:
(My little scanner is power enough. The lossless-check function just looks for a line where all element is "1".)
now display
R(A,B,C,D,E,H);
A -> B;
DE -> A;
BC -> E;
BCD -> A;
ADE -> B;
{A,C,D,H}
{B,C,D,H}
{C,D,E,H}
decomposition #1:{A,B}
dependency is:A -> B;
decomposition #2:{A,D,E}
dependency is:DE -> A;
decomposition #3:{B,C,E}
dependency is:BC -> E;
decomposition #4:{A,C,D,H}
the key has no particular dependency
A B C D E H
A -> B;
110000
DE -> A;
100110
BC -> E;
011010
BCD -> A;
101101
A B C D E H
A -> B;
110000
DE -> A;
110110
BC -> E;
011010
BCD -> A;
111111
Press any key to continue