Open Science Research Excellence
@article{(International Science Index):,
  title    = {Improving Topic Quality of Scripts by Using Scene Similarity Based Word Co-Occurrence},
  author    = {Yunseok Noh and  Chang-Uk Kwak and  Sun-Joong Kim and  Seong-Bae Park},
  country   = {Korea, Republic Of},
  institution={Smart Media Platform Section, Etri, Daejeon},
  abstract  = {Scripts are one of the basic text resources to understand
broadcasting contents. Topic modeling is the method to get the
summary of the broadcasting contents from its scripts. Generally,
scripts represent contents descriptively with directions and speeches,
and provide scene segments that can be seen as semantic units.
Therefore, a script can be topic modeled by treating a scene segment
as a document. Because scene segments consist of speeches mainly,
however, relatively small co-occurrences among words in the scene
segments are observed. This causes inevitably the bad quality of
topics by statistical learning method. To tackle this problem, we
propose a method to improve topic quality with additional word
co-occurrence information obtained using scene similarities. The
main idea of improving topic quality is that the information that
two or more texts are topically related can be useful to learn high
quality of topics. In addition, more accurate topical representations
lead to get information more accurate whether two texts are related
or not. In this paper, we regard two scene segments are related
if their topical similarity is high enough. We also consider that
words are co-occurred if they are in topically related scene segments
together. By iteratively inferring topics and determining semantically
neighborhood scene segments, we draw a topic space represents
broadcasting contents well. In the experiments, we showed the
proposed method generates a higher quality of topics from Korean
drama scripts than the baselines.},
    journal   = {International Journal of Computer, Electrical, Automation, Control and Information Engineering},  volume    = {10},
  number    = {1},
  year      = {2016},
  pages     = {62 - 66},
  ee        = {},
  url       = {},
  bibsource = {},
  issn      = {eISSN:1307-6892},
  publisher = {World Academy of Science, Engineering and Technology},
  index     = {International Science Index 109, 2016},