Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract style tags and non dialogue words. #5

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Changes from 22 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 26 additions & 22 deletions srtparser.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ class SubtitleItem
std::vector<std::string> getNonDialogueWords(); //return string vector of non dialogue words
std::vector<std::string> getStyleTags(); //return string vector of style tags


void setStartTime(long int startTime); //set starting time
void setEndTime(long int endTime); //set ending time
void setText(std::string text); //set subtitle text
Expand Down Expand Up @@ -382,17 +381,12 @@ inline void SubtitleItem::extractInfo(bool keepHTML, bool doNotIgnoreNonDialogue
//stripping HTML tags
if(!keepHTML)
{
/*
* TODO : Before erasing, extract the words.
* std::vector<std::string> getStyleTags();
* int getStyleTagCount() const;
* std::vector<std::string> _styleTag;
* int _styleTagCount;
*/

int countP = 0;
std::string tag;
for(char& c : output) // replacing <...> with ~~~~
{

if(c=='<')
{
countP++;
Expand All @@ -403,34 +397,40 @@ inline void SubtitleItem::extractInfo(bool keepHTML, bool doNotIgnoreNonDialogue
{
if(countP!=0)
{
if(c != '>')
c = '~';

if(c != '>'){
tag += c;
c = '~';
}
else if(c == '>')
{
c = '~';
countP--;
_styleTagCount++;
if(tag[0] == '/'){
tag="";

}
else{
_styleTag.push_back(tag);
tag="";
}
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This if else block can be simplified to a single if statement.

}
}
}

}

}

//stripping non dialogue data e.g. (applause)

if(!doNotIgnoreNonDialogues)
{
/*
* TODO : Before erasing, extract the words.
* std::vector<std::string> getNonDialogueWords();
* int getNonDialogueCount() const;
* std::vector<std::string> _nonDialogue;
* int _nonDialogueCount;
*/

int countP = 0;
std::string action;
for(char& c : output) // replacing (...) with ~~~~
{

if(c=='(')
{
countP++;
Expand All @@ -441,13 +441,17 @@ inline void SubtitleItem::extractInfo(bool keepHTML, bool doNotIgnoreNonDialogue
{
if(countP!=0)
{
if(c != ')')
if(c != ')'){
action.push_back(c);
c = '~';

}
else if(c == ')')
{
c = '~';
countP--;
_nonDialogueCount++;
_nonDialogue.push_back(action);
action="";
}
}
}
Expand Down Expand Up @@ -654,4 +658,4 @@ inline SubtitleWord::~SubtitleWord(void)
}


#endif //SRTPARSER_H
#endif //SRTPARSER_H