Appendix

DTDs

Ontologies
Indexes
<?xml version="1.0" encoding="UTF-8"?>
<!ELEMENT ontology      (use-ontology | def-category | def-relation |
                        def-rename | def-inference | def-constant |
                         def-type)* >
<!ATTLIST ontology
        id                       CDATA   #REQUIRED
        version                  CDATA   #REQUIRED
        description              CDATA   #IMPLIED
        declarators              CDATA   #IMPLIED
        backward-compatible-with CDATA   #IMPLIED >
<!ELEMENT use-ontology   EMPTY>
<!ATTLIST use-ontology
        id              CDATA   #REQUIRED
        version         CDATA   #REQUIRED
        prefix          CDATA   #REQUIRED
        url             CDATA   #IMPLIED >
<!ELEMENT def-category  (sense*)>
<!ATTLIST def-category
        name            CDATA   #REQUIRED
        isa             CDATA   #IMPLIED
        description     CDATA   #IMPLIED
        short           CDATA   #IMPLIED
        sense           CDATA   "UNKNOWN" >
<!ELEMENT sense         (synset, page*)>
<!ATTLIST sense
        no              CDATA   #REQUIRED
        name            CDATA   #REQUIRED
        origin          CDATA   "WN"
        definition      CDATA   ""
        convenience     CDATA   #REQUIRED>
<!ELEMENT synset        (#PCDATA)>
<!ELEMENT page          EMPTY>
<!ATTLIST page
        name            CDATA   #REQUIRED
        frequence       CDATA   #REQUIRED
        convinience     CDATA   #REQUIRED>...
<?xml version="1.0" encoding="UTF-8"?>
<!ELEMENT indexpage (index | nindex)*>
<!ELEMENT index (#PCDATA)>
<!ATTLIST index 
   page        CDATA #REQUIRED
   frequence   CDATA #REQUIRED
   convenience CDATA #REQUIRED>
<!ELEMENT nindex (#PCDATA)>
<!ATTLIST nindex
   page        CDATA #REQUIRED
   frequence   CDATA #REQUIRED
   convenience CDATA #REQUIRED>

Thot's University ontology

<?xml version="1.0" encoding="ISO-8859-1" standalone="no"?>
<!DOCTYPE ontology SYSTEM "file://localhost/...ontologies/JipOnto.dtd">
  <ontology id="university-ont" version="3.0" description="An ontology for describing universities and the activities that occur at them.">
    <def-category name="HumanActivity" sense="1"/>
    <def-category name="Activity" isa="HumanActivity" short="activity" sense="1"/>
    <def-category name="Work" isa="Activity" short="work" sense="1"/>
    <def-category name="Recreation" isa="Activity" short="recreation" sense="1"/>
    <def-category name="Process" isa="Activity" short="process" sense="1"/>
    <def-category name="Course" isa="Work" short="teaching course" sense="1"/>
    <def-category name="Research" isa="Work" short="research work" sense="1"/>
    <def-category name="SocialGroup" short="social group" sense="1"/>
    <def-category name="Organization" isa="SocialGroup" short="organization" sense="1"/>
    <def-category name="EducationOrganization" isa="Organization" short="education organization" sense="0"/>
    <def-category name="Department" isa="EducationOrganization" short="university department" sense="1"/>
    <def-category name="Institute" isa="EducationOrganization" short="institute" sense="1"/>
    <def-category name="School" isa="EducationOrganization" short="school" sense="1"/>
    <def-category name="ResearchGroup" isa="EducationOrganization" short="research group" sense="0" />
    <def-category name="University" isa="EducationOrganization" short="university" sense="3"/>
    <def-category name="Entity" short="entity" sense="1"/>
    <def-category name="Organism" isa="Entity" short="organism" sense="1"/>
    <def-category name="Person" isa="Organism" short="person" sense="1"/>
    <def-category name="Employee" isa="Person" short="employee" sense="1"/>
    <def-category name="FacultyMember" isa="Employee" short="faculty member" sense="1"/>
    <def-category name="Educator" isa="Person" sense="1"/>
    <def-category name="Professor" isa="FacultyMember Educator" short="professor" sense="1"/>
    <def-category name="AssistantProfessor" isa="Professor" short="assistant professor" sense="1"/>
    <def-category name="AssociateProfessor" isa="Professor" short="associate professor" sense="1"/>
    <def-category name="FullProfessor" isa="Professor" short="full professor" sense="1" />
    <def-category name="VisitingProfessor" isa="Professor" short="visiting professor" sense="1"/>
    <def-category name="Lecturer" isa="FacultyMember Educator" short="lecturer" sense="1"/>
    <def-category name="PostDoc" isa="FacultyMember" short="post-doctorate" sense="0"/>
    <def-category name="Assistant" isa="Employee" short="assistant" sense="1"/>
    <def-category name="ResearchAssistant" isa="Assistant" short="university research assistant" sense="0"/>
    <def-category name="TeachingAssistant" isa="Assistant Educator" short="university teaching assistant" sense="0"/>
    <def-category name="Leader" isa="Person" sense="1"/>
    <def-category name="Chief" isa="Leader" sense="1"/>
    <def-category name="AdministrativeStaff" isa="Employee" short="administrative staff worker" sense="0"/>
    <def-category name="Director" isa="AdministrativeStaff Chief" short="director" sense="1"/>
    <def-category name="Chair" isa="AdministrativeStaff Professor Leader" short="chair" sense="3"/>
    <def-category name="Dean" isa="AdministrativeStaff Professor Chief" short="dean" sense="1"/>
    <def-category name="ClericalStaff" isa="AdministrativeStaff" short="clerical staff worker" sense="0"/>
    <def-category name="SystemsStaff" isa="AdministrativeStaff" short="systems staff worker" sense="0"/>
    <def-category name="Learner" isa="Person" sense="1"/>
    <def-category name="Student" isa="Learner" short="student" sense="1"/>
    <def-category name="UndergraduateStudent" isa="Student" short="undergraduate student" sense="0"/>
    <def-category name="GraduateStudent" isa="Student" short="graduate student" sense="1"/>
    <def-category name="Artefact" isa="Entity" sense="1"/>
    <def-category name="Creation" isa="Artefact" sense="2"/>
    <def-category name="Publication" isa="Creation Communication" sense="1" />
    <def-category name="Article" isa="Creation" sense="1"/>
    <def-category name="Book" isa="Publication" sense="1"/>
    <def-category name="BookArticle" isa="Article" sense="0"/>
    <def-category name="ConferencePaper" isa="Article" sense="0"/>
    <def-category name="Thesis" isa="Publication" sense="2"/>
    <def-category name="DoctoralThesis" isa="Thesis" short="phd thesis" sense="0"/>
    <def-category name="Periodical" isa="Publication" sense="1"/>
    <def-category name="Journal" isa="Periodical" sense="2"/>
    <def-category name="JournalArticle" isa="Article" sense="0"/>
    <def-category name="Magazine" isa="Periodical" sense="2"/>
    <def-category name="MastersThesis" isa="Thesis" sense="0"/>
    <def-category name="Proceedings" isa="Publication" sense="2"/>
    <def-category name="WorkshopPaper" isa="Article" sense="0"/>
    <def-category name="Location" isa="Entity" sense="1"/>
    <def-category name="SocialRelation" short="social relation" sense="1"/>
    <def-category name="Communication" isa="SocialRelation" sense="2"/>
    <def-category name="Information" isa="Communication" sense="1"/>
    <def-category name="Program" isa="Information" short="program" sense="4"/>
    <def-category name="Schedule" isa="Communication" short="schedule" sense="2"/>
    <def-category name="Meeting" isa="SocialGroup" sense="1"/>
    <def-category name="Conference" isa="Meeting" short="conference" sense="1"/>...
  </ontology>

Extract of an indexation process

Site : http://www.cs.washington.edu/
Frequency threshold : 0.5
Ontology : completely disambiguated Thoth's University's ontology

<?xml version="1.0" encoding="ISO-8859-1" standalone="no"?>
<!DOCTYPE ontology SYSTEM "file://localhost/home/.../ontologies/JipOnto.dtd">
<ontology id="university-ont" version="3.0" description="">
  <def-category name="Lecturer" short="lecturer" description="" sense="1" isa="FacultyMember Educator">
    <sense name="Lecturer" no="1" origin="wn" convenience="1.0">
      <synset>lector#1,lecturer#1,reader#4</synset>
      <page name="http://www.cs.washington.edu/news/2000DLS.html" frequence="0.75" convenience="0.1"/>
      <page name="http://www.cs.washington.edu/homes/lazowska/dls/" frequence="1.0" convenience="0.01"/>
      <page name="http://www.cs.washington.edu/htbin-post/mvis/mvis/TVtalks" frequence="0.5" convenience="0.1"/>
      <page name="http://www.cs.washington.edu/homes/dickey/grad-brochure-blurb.htm" frequence="0.5" convenience="0.5"/>
      <page name="http://www.cs.washington.edu/news/1999DLS.html" frequence="0.75" convenience="0.01"/>
    </sense>
  </def-category>
  <def-category name="VisitingProfessor" short="visiting professor" description="" sense="1" isa="Professor">
    <sense name="VisitingProfessor" no="1" origin="wn" convenience="1.0">
      <synset>visiting professor#1</synset>
    </sense>
  </def-category>
  <def-category name="Employee" short="employee" description="" sense="1" isa="Person">
    <sense name="Employee" no="1" origin="wn" convenience="1.0">
      <synset>employee#1</synset>
      <page name="http://www.cs.washington.edu/homes/lazowska/chair/telecommuting.html" frequence="1.0" convenience="1.0"/>
      <page name="http://www.cs.washington.edu/homes/lazowska/ott/CSE_affiliate_edl.htm" frequence="0.53" convenience="1.0"/>
    </sense>
  </def-category>
  <def-category name="Chair" short="chair" description="" sense="3" isa="Leader AdministrativeStaff Professor">
    <sense name="Chair" no="3" origin="wn" convenience="1.0">
      <synset>chair#3,chairman#1,chairperson#1,chairwoman#1,president#4</synset>
      <page name="http://www.cs.washington.edu/homes/lazowska/cra/case/" frequence="0.57" convenience="0.9"/>
      <page name="http://www.cs.washington.edu/lab/quotes.html" frequence="0.6" convenience="0.9"/>
      <page name="http://www.cs.washington.edu/people/staff/people_who_can_help.html" frequence="0.5" convenience="0.5"/>
      <page name="http://www.cs.washington.edu/ARL/committee.html" frequence="0.6" convenience="0.95"/>
      <page name="http://www.cs.washington.edu/info/contact/" frequence="1.0" convenience="0.3"/>
      <page name="http://www.cs.washington.edu/homes/lazowska/hightech/ht/index.html" frequence="0.5" convenience="0.3"/>
      <page name="http://www.cs.washington.edu/homes/lazowska/" frequence="0.63" convenience="0.9"/>
      <page name="http://www.cs.washington.edu/homes/lazowska/lazowska.html" frequence="0.66" convenience="0.9"/>
      <page name="http://www.cs.washington.edu/people/faculty/tanimoto.html" frequence="0.9" convenience="0.95"/>
      <page name="http://www.cs.washington.edu/people/faculty/lazowska/" frequence="0.63" convenience="0.9"/>
      <page name="http://www.cs.washington.edu/homes/lazowska/ip/" frequence="0.5" convenience="0.1"/>
      <page name="http://www.cs.washington.edu/people/acm/people/" frequence="0.5" convenience="0.3"/>
      <page name="http://www.cs.washington.edu/leadership/sld002.htm" frequence="1.0" convenience="0.1"/>
      <page name="http://www.cs.washington.edu/people/faculty/young.html" frequence="0.7" convenience="0.9"/>
      <page name="http://www.cs.washington.edu/homes/lazowska/hightech/ht/tsld001.htm" frequence="0.5" convenience="0.3"/>
    </sense>
  </def-category>...
</ontology>