A Real-Time Speech Translator
Ajay Kumar Barun
I'm excited to share my latest project: a Speech Translator Application that leverages Tkinter and Azure Cognitive Services for real-time translation. This application aims to break down language barriers by providing instant translations and speech synthesis in multiple languages, including Hindi, English, Tamil, and Telugu.
Key Features:
This project is a testament to the powerful capabilities of Azure Cognitive Services and demonstrates how we can leverage cloud-based AI to create innovative solutions that enhance communication and inclusivity.
Here's a breakdown of the sample code into steps, explaining each part:
Step 1: Import Necessary Libraries
import tkinter as tk
from tkinter import messagebox
import azure.cognitiveservices.speech as speechsdk
import threading
Step 2: Initialize the Main Application Class
class SpeechTranslatorApp:
def __init__(self, root):
self.root = root
self.root.title("Speech Translator")
self.is_listening = False
Step 3: Create the Menu
def create_menu(self):
menu_bar = tk.Menu(self.root)
file_menu = tk.Menu(menu_bar, tearoff=0)
menu_bar.add_cascade(label="File", menu=file_menu)
file_menu.add_command(label="Exit", command=self.exit_program)
Create a menu bar and add a "File" menu with an "Exit" option.
Step 4: Exit Program Method
def exit_program(self):
Define a method to quit the application.
Step 5: Setup the User Interface (UI)
def setup_ui(self):
self.canvas = tk.Canvas(root, bg="#2c3e50")
self.canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
self.scrollbar = tk.Scrollbar(root, command=self.canvas.yview)
self.scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
self.main_frame = tk.Frame(self.canvas, bg="#2c3e50")
self.canvas.create_window((0, 0), window=self.main_frame, anchor="nw")
self.main_frame.bind("<Configure>", self.on_frame_configure)
Setup the main UI components, including a canvas for scrolling, frames for layout, and methods for creating different sections of the UI.
Step 6: Create Header
def create_header(self):
self.header_frame = tk.Frame(self.main_frame, bg="#34495e", pady=10)
self.title_label = tk.Label(self.header_frame, text="Speech Translator", font=("Helvetica", 16, "bold"), fg="white", bg="#34495e")
Create a header section with a title label.
Step 7: Create Controls
def create_controls(self):
self.control_frame = tk.Frame(self.main_frame, bg="#2c3e50", pady=20)
self.start_button = tk.Button(self.control_frame, text="Start Listening", font=("Helvetica", 12), bg="#1abc9c", fg="white", command=self.toggle_listening)
Create a control section with a start/stop listening button.
Step 8: Create Language Selection
def create_language_selection(self):
self.language_frame = tk.Frame(self.main_frame, bg="#2c3e50", pady=10)
self.language_frame.pack(fill=tk.X, padx=20)
self.languages = {"hi": tk.BooleanVar(value=True), "en": tk.BooleanVar(value=True), "ta": tk.BooleanVar(value=True), "te": tk.BooleanVar(value=True)}
for lang in self.languages:
checkbox = tk.Checkbutton(self.language_frame, text=lang, variable=self.languages[lang], font=("Helvetica", 12), fg="white", bg="#2c3e50", selectcolor="#2c3e50", activebackground="#2c3e50", activeforeground="white")
checkbox.pack(side=tk.LEFT, padx=10)
Create a section for selecting languages using checkboxes.
Step 9: Create Output Display
def create_output_display(self):
self.output_frame = tk.Frame(self.main_frame, bg="#2c3e50", pady=10)
self.output_frame.pack(fill=tk.BOTH, expand=True, padx=20)
self.output_label = tk.Label(self.output_frame, text="Recognized Speech:", font=("Helvetica", 14), fg="white", bg="#2c3e50")
self.output_text_frame = tk.Frame(self.output_frame)
self.output_text_frame.pack(fill=tk.BOTH, expand=True)
self.output_scrollbar = tk.Scrollbar(self.output_text_frame)
self.output_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
self.output_text = tk.Text(self.output_text_frame, height=10, width=50, yscrollcommand=self.output_scrollbar.set, font=("Helvetica", 12), wrap=tk.WORD, bg="#ecf0f1")
self.output_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
Create a section to display the recognized speech.
Step 10: Create Translation Display
def create_translation_display(self):
self.translated_frame = tk.Frame(self.main_frame, bg="#2c3e50", pady=10)
self.translated_frame.pack(fill=tk.BOTH, expand=True, padx=20)
self.translated_frames = {}
self.target_languages = ["hi", "en", "ta", "te"]
for lang in self.target_languages:
frame = tk.Frame(self.translated_frame, bg="#2c3e50", pady=5)
frame.pack(fill=tk.BOTH, expand=True)
label = tk.Label(frame, text=f"Translated into {lang}:", font=("Helvetica", 14), fg="white", bg="#2c3e50")
label.pack(anchor="w", pady=2)
text_scroll_frame = tk.Frame(frame)
text_scroll_frame.pack(fill=tk.BOTH, expand=True)
text_scrollbar = tk.Scrollbar(text_scroll_frame)
text_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
text_box = tk.Text(text_scroll_frame, height=5, width=50, yscrollcommand=text_scrollbar.set, font=("Helvetica", 12), wrap=tk.WORD, bg="#ecf0f1")
text_box.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
button_frame = tk.Frame(frame, bg="#2c3e50")
button_frame.pack(fill=tk.X, pady=5)
speak_button = tk.Button(button_frame, text="Speak", font=("Helvetica", 12), bg="#3498db", fg="white", command=lambda l=lang: self.speak_translation(l))
speak_button.pack(side=tk.LEFT, padx=10)
play_button = tk.Button(button_frame, text="Play Latest", font=("Helvetica", 12), bg="#e67e22", fg="white", command=lambda l=lang: self.play_latest_translation(l))
play_button.pack(side=tk.LEFT, padx=10)
self.translated_frames[lang] = {"text_box": text_box, "latest_translation": ""}
Create sections to display translations for each target language with options to speak or play the latest translation
Step 11: Setup Translation Service
def setup_translation_service(self):
self.speech_translation_config = speechsdk.translation.SpeechTranslationConfig(subscription='YOUR_SUBSCRIPTION_KEY', region='YOUR_REGION')
self.speech_translation_config.speech_recognition_language = "en-US"
for lang in self.target_languages:
self.audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
self.translation_recognizer = speechsdk.translation.TranslationRecognizer(translation_config=self.speech_translation_config, audio_config=self.audio_config)
Setup the Azure Cognitive Services speech translation configuration and recognizer.
Step 12: Toggle Listening Method
def toggle_listening(self):
if not self.is_listening:
self.start_button.config(text="Stop Listening", bg="#e74c3c")
self.is_listening = True
self.start_button.config(text="Start Listening", bg="#1abc9c")
self.is_listening = False
Define a method to start/stop listening and handle the button text and color changes.
Step 13: Recognize Continuous Method
def recognize_continuous(self):
Define a method to start continuous speech recognition in a separate thread.
Step 14: Handle Recognized Speech
def on_recognized(self, evt):
if evt.result.reason == speechsdk.ResultReason.TranslatedSpeech:
self.output_text.insert(tk.END, f"Recognized: {evt.result.text}\n")
for lang in self.target_languages:
if self.languages[lang].get():
translation = evt.result.translations.get(lang, "")
self.translated_frames[lang]["text_box"].insert(tk.END, f"{translation}\n")
self.translated_frames[lang]["latest_translation"] = translation
self.speak_translation_text(lang, translation)
elif evt.result.reason == speechsdk.ResultReason.NoMatch:
self.output_text.insert(tk.END, "No speech could be recognized.\n")
elif evt.result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = evt.result.cancellation_details
self.output_text.insert(tk.END, f"Speech Recognition canceled: {cancellation_details.reason}\n")
if cancellation_details.reason == speechsdk.CancellationReason.Error:
self.output_text.insert(tk.END, f"Error details: {cancellation_details.error_details}\n")
messagebox.showerror("Error", "Did you set the speech resource key and region values?")
Define a method to handle recognized speech events and display the recognized text and translations.
Step 15: Speak Translation Text Method
def speak_translation_text(self, lang, text):
if text:
tts_config = speechsdk.SpeechConfig(subscription='YOUR_SUBSCRIPTION_KEY', region='YOUR_REGION')
audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
synthesizer = speechsdk.SpeechSynthesizer(speech_config=tts_config, audio_config=audio_config)
if lang == "hi":
tts_config.speech_synthesis_voice_name = "hi-IN-MadhurNeural"
elif lang == "ta":
tts_config.speech_synthesis_voice_name = "ta-IN-PallaviNeural"
elif lang == "te":
tts_config.speech_synthesis_voice_name = "te-IN-MohanNeural"
elif lang == "en":
tts_config.speech_synthesis_voice_name = "en-US-JennyNeural"
Define a method to speak the translated text using Azure Cognitive Services Text-to-Speech.
Step 16: Speak Translation Method
def speak_translation(self, lang):
text = self.translated_frames[lang]["text_box"].get(1.0, tk.END).strip()
self.speak_translation_text(lang, text)
Define a method to speak the translation from the text box.
Step 17: Play Latest Translation Method
def play_latest_translation(self, lang):
text = self.translated_frames[lang]["latest_translation"]
self.speak_translation_text(lang, text)
Define a method to play the latest translation.
Step 18: Handle Frame Configuration for Scrolling
def on_frame_configure(self, event):
Define a method to configure the scroll region of the canvas.
Step 19: Run the Application
if __name__ == "__main__":
root = tk.Tk()
app = SpeechTranslatorApp(root)
Initialize and run the Tkinter application.
I'm looking forward to any feedback and suggestions from the community. Feel free to reach out if you have any questions or want to know more about the implementation details.
